Initial pilot data were collected via Qualtrics surveys named
MAG_S2_PROLIFIC-DATACOLLAR_0 and
MAG_S2_PROLIFIC-DATACOLLAR_0, each presenting participants
with one common graph (“block 0”, STIMULUS = B0-0) and four
subsequent graphs in random order (“block 1”).
Subsequently we made minor alterations to the question wording and respose wording, and the remaining stimuli were organized in 6 blocks. Each block was collected via PROLIFIC using independent Qualtrics surveys, with the exception of a sample of respondents directly recruited from Tumblr, using a single Qualtrics surveys with randomization logic to assign each Tumblr participant to one of the six stimulus blocks.
Participants were excluded from the sample for the following reasons
Following wrangling, there should be 6 blocks of stimuli, each containing responses from 40 participants from prolific, and TODO responses directly from Tumblr.
############## IMPORT STIMULI FILE
df_stimuli <- read_csv("data/input/stimuli.csv", col_names = TRUE) %>%
mutate(
BLOCK = as.factor(BLOCK),
STIMULUS_CATEGORY = as.factor(CATEGORY),
ID = as.factor(ID),
MAKER_ID = as.factor(MAKER_ID)
)
############## BUILD LABELS
ref_stimuli <- levels(df_stimuli$ID)
ref_conf_questions <- c("MAKER_CONF", "AGE_CONF", "GENDER_CONF", "TOOL_CONF")
ref_sd_questions <- c("MAKER_DESIGN","MAKER_DATA","MAKER_POLITIC",
"MAKER_ARGUE","MAKER_SELF","MAKER_ALIGN","MAKER_TRUST",
"CHART_LIKE", "CHART_BEAUTY", "CHART_INTENT", "CHART_TRUST")
left <- c("professional","professional","left-leaning","confrontational",
"altruistic","does NOT share","untrustworthy",
"NOT at all","NOT at all", "inform", "untrustworthy")
right <- c("layperson","layperson","right-leaning","diplomatic",
"selfish", "DOES share", "trustworthy",
"very much", "very much", "persuade", "trusthworthy")
ref_labels <- as.data.frame(cbind(left,right))
rownames(ref_labels) <- ref_sd_questions
ref_blocks <- c("block1", "block2", "block3", "block4", "block5", "block6")
rm(left,right)
############## STUDY ID FILE
#most blocks were run as separate qualtrics surveys with diffferent recruitments in Prolific
#Tumblr was run with all blocks and randomization
df_studies <- read_csv("data/input/studies.csv", col_names = TRUE) %>%
mutate(
ID.Study = as.factor(ID.Study),
Assigned.Block = as.factor(Assigned.Block),
Distribution = as.factor(Distribution),
Prolific.Name = as.factor(Prolific.Name),
Qualtrics.URL = as.factor(Qualtrics.URL),
Qualtrics.Survey = as.factor(Qualtrics.Survey),
Sample = as.factor(Sample),
Scope = as.factor(Scope)
)
#1. IMPORT RAW DATA FILES ########################################################
#### RAW DATA ####################################################################
# will always be the unaltered version of imported data
# 1 row per subject
df_raw_datacollar <- read_csv("data/input/CLEAN_MAG_S2_PROLIFIC_datacollarpilot_B1.csv", col_names = TRUE)
df_raw_bluecollar <- read_csv("data/input/CLEAN_MAG_S2_PROLIFIC_bluecollarpilot_B1.csv", col_names = TRUE)
df_raw_b1 <- read_csv("data/input/CLEAN_MAG_S2_PROLIFIC_GENERAL_B1.csv", col_names = TRUE)
df_raw_b2 <- read_csv("data/input/CLEAN_MAG_S2_PROLIFIC_GENERAL_B2.csv", col_names = TRUE)
df_raw_b3 <- read_csv("data/input/CLEAN_MAG_S2_PROLIFIC_GENERAL_B3.csv", col_names = TRUE)
df_raw_b4 <- read_csv("data/input/CLEAN_MAG_S2_PROLIFIC_GENERAL_B4.csv", col_names = TRUE)
df_raw_b5 <- read_csv("data/input/CLEAN_MAG_S2_PROLIFIC_GENERAL_B5.csv", col_names = TRUE)
df_raw_b6 <- read_csv("data/input/CLEAN_MAG_S2_PROLIFIC_GENERAL_B6.csv", col_names = TRUE)
df_raw_tumblr_paid <- read_csv("data/input/CLEAN_MAG_S2_TUMBLR_PAID_ALL.csv", col_names = TRUE)
df_raw_tumblr_free <- read_csv("data/input/CLEAN_MAG_S2_TUMBLR_FREE_ALL.csv", col_names = TRUE)
# drop first two rows (qualtrics_specs)
df_raw_datacollar <- df_raw_datacollar[-c(1:2),]
df_raw_bluecollar <- df_raw_bluecollar[-c(1:2),]
df_raw_b1 <- df_raw_b1[-c(1:2),]
df_raw_b2 <- df_raw_b2[-c(1:2),]
df_raw_b3 <- df_raw_b3[-c(1:2),]
df_raw_b4 <- df_raw_b4[-c(1:2),]
df_raw_b5 <- df_raw_b5[-c(1:2),]
df_raw_b6 <- df_raw_b6[-c(1:2),]
df_raw_tumblr_paid <- df_raw_tumblr_paid[-c(1:2),]
df_raw_tumblr_free <- df_raw_tumblr_free[-c(1:2),]
# ADD DUMMY COLS TO DATA/BLUE COLLAR PILOT DATA
# necessary b/c pilot (block 1) did not have chart_action behavioural question
# x <- compare_df_cols(df_raw_b2, df_raw_pilot)
df_raw_pilot <- rbind(df_raw_datacollar, df_raw_bluecollar) %>%
mutate(
'0_Q_B0_CHART_ACTION' = NA,
'1_Q_B1_CHART_ACTION' = NA,
'1_Q_B2_CHART_ACTION' = NA,
'1_Q_B3_CHART_ACTION' = NA,
'1_Q_B4_CHART_ACTION' = NA,
'1_Q_B5_CHART_ACTION' = NA,
'1_Q_B6_CHART_ACTION' = NA,
'2_Q_B1_CHART_ACTION' = NA,
'2_Q_B2_CHART_ACTION' = NA,
'2_Q_B3_CHART_ACTION' = NA,
'2_Q_B4_CHART_ACTION' = NA,
'2_Q_B5_CHART_ACTION' = NA,
'2_Q_B6_CHART_ACTION' = NA,
'3_Q_B1_CHART_ACTION' = NA,
'3_Q_B2_CHART_ACTION' = NA,
'3_Q_B3_CHART_ACTION' = NA,
'3_Q_B4_CHART_ACTION' = NA,
'3_Q_B5_CHART_ACTION' = NA,
'3_Q_B6_CHART_ACTION' = NA,
'4_Q_B1_CHART_ACTION' = NA,
'4_Q_B2_CHART_ACTION' = NA,
'4_Q_B3_CHART_ACTION' = NA,
'4_Q_B4_CHART_ACTION' = NA,
'4_Q_B5_CHART_ACTION' = NA,
'4_Q_B6_CHART_ACTION' = NA
)
# BIND COLUMNS RAW prolific datasets
df_raw_prolific <- rbind(df_raw_pilot, df_raw_b1, df_raw_b2, df_raw_b3, df_raw_b4, df_raw_b5, df_raw_b6)
# RETROFIT SOME COLNAMES FOR COMPATIBILITY WITH TUMBLR
df_raw_prolific <- df_raw_prolific %>%
#drop T_BROWSER cols [these are blank]
select(-contains("T_BROWSER"), -T_EMAIL) %>%
mutate(
Q_RelevantIDDuplicate = NA,
Q_RelevantIDDuplicateScore = NA,
Q_RelevantIDFraudScore = NA,
Q_RelevantIDLastStartDate = NA,
RANDOM_BLOCK = NA
) %>% rename_with(
stringr::str_replace,
pattern = "P_BROWSER", replacement = "BROWSER",
) %>%
rename(
BROWSER_OS = `BROWSER_Operating System`
) %>% mutate_all(funs(str_replace(., "millenial", "millennial")))
# BIND COLUMNS RAW prolific datasets
df_raw_tumblr <- rbind(df_raw_tumblr_paid, df_raw_tumblr_free)
# RETROFIT SOME TUMBLR COLNAMES FOR MERGING WITH PROLIFIC
df_raw_tumblr <- df_raw_tumblr %>%
select( - t_example_question1, -t_example_question2, -D_email) %>%
mutate(
#add empty browser cols; didn't collect these in tumblr qualtrics
BROWSER_Browser = NA,
BROWSER_OS = NA,
BROWSER_Resolution = NA,
BROWSER_Version = NA,
ID_PROLIFIC = "TUMBLR",
ID_SESSION = "TUMBLR",
ID_STUDY = "TUMBLR"
) %>% mutate_all(funs(str_replace(., "millenial", "millennial")))
#dataframe column comparisons
# x <- janitor::compare_df_cols(df_raw_prolific, df_raw_tumblr)
# JOINT PROLIFIC AND TUMBLR DATAFRAMES
df_raw <- rbind(df_raw_prolific, df_raw_tumblr)
df_raw <- df_raw %>%
#reorder
select( RANDOM_BLOCK,
End_State,
StartDate:randomize_common,
ID_PROLIFIC:ID_SESSION,
D_gender:FEEDBACK,
PROLIFIC_PID: FL_14_DO,
Q_RelevantIDDuplicate: Q_RelevantIDLastStartDate,
`0_Q_B0_ENCOUNTER`: `0_Q_B0_CHART_LATENCY_Click Count`,
`0_Q_B0_CHART_ACTION`,
`1_Q_B1_loop-number` : `1_Q_B1_CHART_LATENCY_Click Count`,
`1_Q_B1_CHART_ACTION`,
`2_Q_B1_loop-number` : `2_Q_B1_CHART_LATENCY_Click Count`,
`2_Q_B1_CHART_ACTION`,
`3_Q_B1_loop-number` : `3_Q_B1_CHART_LATENCY_Click Count`,
`3_Q_B1_CHART_ACTION`,
`4_Q_B1_loop-number` : `4_Q_B1_CHART_LATENCY_Click Count`,
`4_Q_B1_CHART_ACTION`,
`1_Q_B2_loop-number` : `1_Q_B2_CHART_LATENCY_Click Count`,
`1_Q_B2_CHART_ACTION`,
`2_Q_B2_loop-number` : `2_Q_B2_CHART_LATENCY_Click Count`,
`2_Q_B2_CHART_ACTION`,
`3_Q_B2_loop-number` : `3_Q_B2_CHART_LATENCY_Click Count`,
`3_Q_B2_CHART_ACTION`,
`4_Q_B2_loop-number` : `4_Q_B2_CHART_LATENCY_Click Count`,
`4_Q_B2_CHART_ACTION`,
`1_Q_B3_loop-number` : `1_Q_B3_CHART_LATENCY_Click Count`,
`1_Q_B3_CHART_ACTION`,
`2_Q_B3_loop-number` : `2_Q_B3_CHART_LATENCY_Click Count`,
`2_Q_B3_CHART_ACTION`,
`3_Q_B3_loop-number` : `3_Q_B3_CHART_LATENCY_Click Count`,
`3_Q_B3_CHART_ACTION`,
`4_Q_B3_loop-number` : `4_Q_B3_CHART_LATENCY_Click Count`,
`4_Q_B3_CHART_ACTION`,
`1_Q_B4_loop-number` : `1_Q_B4_CHART_LATENCY_Click Count`,
`1_Q_B4_CHART_ACTION`,
`2_Q_B4_loop-number` : `2_Q_B4_CHART_LATENCY_Click Count`,
`2_Q_B4_CHART_ACTION`,
`3_Q_B4_loop-number` : `3_Q_B4_CHART_LATENCY_Click Count`,
`3_Q_B4_CHART_ACTION`,
`4_Q_B4_loop-number` : `4_Q_B4_CHART_LATENCY_Click Count`,
`4_Q_B4_CHART_ACTION`,
`1_Q_B5_loop-number` : `1_Q_B5_CHART_LATENCY_Click Count`,
`1_Q_B5_CHART_ACTION`,
`2_Q_B5_loop-number` : `2_Q_B5_CHART_LATENCY_Click Count`,
`2_Q_B5_CHART_ACTION`,
`3_Q_B5_loop-number` : `3_Q_B5_CHART_LATENCY_Click Count`,
`3_Q_B5_CHART_ACTION`,
`4_Q_B5_loop-number` : `4_Q_B5_CHART_LATENCY_Click Count`,
`4_Q_B5_CHART_ACTION`,
`1_Q_B6_loop-number` : `1_Q_B6_CHART_LATENCY_Click Count`,
`1_Q_B6_CHART_ACTION`,
`2_Q_B6_loop-number` : `2_Q_B6_CHART_LATENCY_Click Count`,
`2_Q_B6_CHART_ACTION`,
`3_Q_B6_loop-number` : `3_Q_B6_CHART_LATENCY_Click Count`,
`3_Q_B6_CHART_ACTION`,
`4_Q_B6_loop-number` : `4_Q_B6_CHART_LATENCY_Click Count`,
`4_Q_B6_CHART_ACTION`
#df_raw 937 vars
)
#DROP WIP DATAFRAMES
rm(df_raw_datacollar, df_raw_bluecollar, df_raw_b1, df_raw_b2, df_raw_b3, df_raw_b4, df_raw_b5, df_raw_b6, df_raw_pilot, df_raw_prolific, df_raw_tumblr, df_raw_tumblr_free, df_raw_tumblr_paid)
#2. CLEAN MASTER PARTICIPANT-LEVEL DF #########################################################
################################################################################################
#### MASTER WIDE FORMAT DATA FRAME [1 row / qualtrics submission] ################
df_data <- df_raw %>%
select(
-EndDate, -IPAddress, -RecordedDate,
-RecipientLastName, -RecipientFirstName, -RecipientEmail,
-ExternalReference, -LocationLatitude, -LocationLongitude,
-DistributionChannel, -UserLanguage, -Q_RecaptchaScore,
-BROWSER_Version, -BROWSER_Resolution,
-CONSENT, -ELIGIBILITY,
-randomize_common,
#hidden q that controls common stimulus url
-stimulus_common,
#not actually randomization order
-FL_14_DO,
-contains("First Click"), -contains("Last Click"), -contains("Click Count"),
-D_politicalParty_DO,
-ID_PROLIFIC, -ID_STUDY, -ID_SESSION #redundant to other cols
) %>%
rename(
duration.sec = `Duration (in seconds)`,
EndState = End_State,
TerminateFlag = Q_TerminateFlag,
Source = Status, #where the survey originated from (should not be preview or test)
PLATFORM = Q_PLATFORM,
ID.Qualtrics = ResponseId,
ID.Prolific = PROLIFIC_PID,
ID.Study = STUDY_ID,
ID.Session = SESSION_ID,
# P_BROWSER_OS = `P_BROWSER_Operating System`,
# T_BROWSER_OS = `T_BROWSER_Operating System`,
SCREEN_workFunction_TEXT = SCREEN_workFunction_22_TEXT,
SCREEN_socialMedia_TEXT = SCREEN_socialMedia_18_TEXT,
D_politicalParty_OTHER = D_politicalParty_4_TEXT,
D_politicsSocial = D_politicsSocial_1,
D_politicsFiscal = D_politicsFiscal_2
) %>%
mutate(
#SET FACTORS
D_politicsSocial = as.numeric(D_politicsSocial),
D_politicsFiscal = as.numeric(D_politicsFiscal),
ID.Study = factor(ID.Study),
ID.Qualtrics = factor(ID.Qualtrics),
ID.Prolific = factor(ID.Prolific),
ID.Session = factor(ID.Session),
PLATFORM = factor(PLATFORM),
Source = factor(Source),
Finished = as.logical(Finished),
TerminateFlag = factor(TerminateFlag),
EndState = factor(EndState),
D_gender = factor(D_gender),
D_age = factor(D_age),
D_income = factor(D_income,
levels = c(
"Prefer not to say",
"Less than $25,000",
"$25,000-$49,999" ,
"$50,000-$74,999" ,
"$75,000-$99,999" ,
"$100,000-$149,999",
"$150,000 or more"
)),
D_employmentStatus = factor(D_employmentStatus),
duration.sec = as.numeric(duration.sec), #weird booleans should only be for the test generator
duration.min = round(duration.sec/60,2),
Progress = as.numeric(Progress),
D_education = forcats::fct_na_value_to_level( D_education, level="NA"),
D_education = forcats::fct_collapse( D_education,
no_data = "NA",
less_high_school = c("Some high school or less"),
high_school = c("High school diploma or GED"),
some_college = c("Some college, but no degree", "Some college, no degree"),
associates =c( "Associates or technical degree"),
undergrad = c("Bachelor’s degree","Bachelor's degree"),
grad = c("Graduate or professional degree (MA, MS, MBA, PhD, JD, MD, DDS etc.)",
"Graduate or professional degree (MA, MS, MBA, PhD, JD, MD, DDS, etc)")
),
D_education = factor(D_education,
levels = c("no_data", "less_high_school","high_school", "some_college",
"associates", "undergrad", "grad"),
labels = c("NA", "some high school or less","high school diploma or GED ",
"some college", "associates or technical degree",
"undergradudate degree", "graduate or professional degree"),
),
D_politicalParty = factor(D_politicalParty, levels = c("No preference", "Other", "Independent", "Republican", "Democrat")),
D_age = factor(D_age,
levels = c("18-24 years old" ,
"25-34 years old" ,
"35-44 years old" ,
"45-54 years old" ,
"55-64 years old" ,
"65+ years old" ),
labels = c("18-24", "25-34","35-44","45-54","55-64","65+ years"))
) %>%
#REPLACE RANDOM TRAILING _1 AND _65 FROM QUALTRICS
rename_with( .cols = contains('_65'), .fn = ~str_replace(., pattern = '_65', replacement = '')) %>%
rename_with( .cols = contains('_1'), .fn = ~str_replace(., pattern = '_1', replacement = '')) %>%
#RM _PAGE Submit from LATENCY
rename_with( .cols = contains('_Page Submit'), .fn = ~str_replace(., pattern = '_Page Submit', replacement = '')) %>%
#CHANGE DETAIL QUESTION DELIMITER FOR PIVOT PURPOSES
rename_with( .cols = contains('_CHART_'), .fn = ~str_replace(., pattern = '_CHART_', replacement = '_CHART-')) %>%
#CHANGE DETAIL QUESTION DELIMITER FOR PIVOT PURPOSES
rename_with( .cols = contains('_MAKER_'), .fn = ~str_replace(., pattern = '_MAKER_', replacement = '_MAKER-')) %>%
#CHANGE DETAIL QUESTION DELIMITER FOR PIVOT PURPOSES
rename_with( .cols = contains('_AGE_'), .fn = ~str_replace(., pattern = '_AGE_', replacement = '_AGE-')) %>%
#CHANGE DETAIL QUESTION DELIMITER FOR PIVOT PURPOSES
rename_with (.cols = contains('_GENDER_'), .fn = ~str_replace(., pattern = '_GENDER_', replacement = '_GENDER-')) %>%
#CHANGE DETAIL QUESTION DELIMITER FOR PIVOT PURPOSES
rename_with (.cols = contains('_TOOL_'), .fn = ~str_replace(., pattern = '_TOOL_', replacement = '_TOOL-')) %>%
select(
#reordering
RANDOM_BLOCK:Progress,
Finished, EndState, TerminateFlag,
Q_RelevantIDDuplicate:Q_RelevantIDLastStartDate,
ID.Qualtrics,
ID.Prolific : ID.Session,
duration.sec, duration.min,
BROWSER_Browser : BROWSER_OS,
D_gender:D_politicsFiscal,
SCREEN_workMethod: SCREEN_socialMedia_TEXT,
PURPOSE, FEEDBACK, PLATFORM,
`0_Q_B0_ENCOUNTER`: `4_Q_B6_CHART-ACTION`
)
##### JOIN STUDY-LEVEL DATA
df_data <- dplyr::left_join(df_data, df_studies, by="ID.Study")
#SET IDS AND ASSIGNED BLOCK TO HANDLE PROLIFIC AND TUMBLR
df_data <- df_data %>%
mutate(
# SET ASSIGNMENT BLOCK FOR TUMBLR
Assigned.Block = if_else( (Distribution =="TUMBLR"), RANDOM_BLOCK, Assigned.Block),
Assigned.Block = factor(Assigned.Block),
# PID = if_else( (Distribution =="TUMBLR"), ID.Qualtrics, ID.Prolific),
PID = factor(ID.Qualtrics)
) %>% #DROP RANDOM_BLOC COLUMNS
select (-RANDOM_BLOCK) %>%
select(
#REORDER
PID,
Distribution,
Assigned.Block,
ID.Qualtrics:ID.Session,
EndState,
StartDate: Q_RelevantIDLastStartDate,
Prolific.Name, Qualtrics.Survey, Qualtrics.URL, Description, Sample, Scope,
duration.sec:PLATFORM,
`0_Q_B0_ENCOUNTER` : `4_Q_B6_CHART-ACTION`
)
#2B CLEAN MASTER PARTICIPANT-LEVEL DF #########################################
#### SEGREGATE PARTICIPANTS WHO DID NOT COMPLETE ###############################
## [1 row / qualtrics submission] ################
## NOTE it is common for prolific participants to fail the
## screening verification, and then try again but change their
## screening verification answers (ie. one prolific ID for multiple qualrics IDs)
df_exclude <- df_data %>%
filter(
!is.na(TerminateFlag) | Finished == FALSE | EndState != "COMPLETE"
) %>%
select(
PID, Distribution, ID.Qualtrics, ID.Prolific, ID.Study, Assigned.Block, Scope, Source, Progress,
Finished, TerminateFlag, EndState,StartDate, duration.min,
D_gender:D_politicsFiscal, SCREEN_workMethod:FEEDBACK, Prolific.Name:Scope
) %>%
mutate(
EndState = if_else( (Progress < 100), "abandoned", EndState),
EndState = if_else( (str_detect(EndState,"screen")), "screened", EndState),
EndState = if_else( (TerminateFlag == "Screened" & is.na(EndState)), "screened", EndState),
EndState = factor(EndState)
)
#### MASTER VALID DATA [WIDE] 1 row per qualtrics entry #########################
## [1 row / qualtrics submission] ################
df_data <- df_data %>% filter(
PID %nin% df_exclude$PID
# (Finished == TRUE ) & is.na(TerminateFlag)
) %>% mutate(
EndState = droplevels(EndState),
ID.Prolific = droplevels(ID.Prolific),
ID.Qualtrics = droplevels(ID.Qualtrics),
PID = droplevels((PID))
)
#sanity check === SHOULD BE O
#no qualtrics surveys in good data that weren't finished
print("Number of PID entries in df_data AND nofinish/excluded? [should be 0]")
## [1] "Number of PID entries in df_data AND nofinish/excluded? [should be 0]"
sum(df_data$PID %in% df_exclude$PID)
## [1] 0
#sanity check === SHOULD BE O
#no duplicated PROLIFIC ids in good data
print("Number of PIDs duplicated in df_data print [should be 0]")
## [1] "Number of PIDs duplicated in df_data print [should be 0]"
sum(duplicated(df_data$PID))
## [1] 0
## save participant-level data file for qda validation
## note that this DOES contain pilot data
## this does NOT contain excluded participants
write.csv(df_data, file = "data/output/df_participants.csv", na="")
#3 CREATE TRIAL LEVEL DFS FOR QDA ###############################################
#### CHART LEVEL DATA FRAME (LONG) FOR QDA (incl demographics) ##################
#### INCLUDES PILOT DATA FROM DATACOLLAR BLUECOLLAR PROLIFIC RECRUITMENT ########
# 1 ROW / participant X GRAPH including demographics
# UNRAVEL TO QUESTIONS
df_qda_input <- df_data %>%
# select(
# ID.Qualtrics:ID.Study, PLATFORM,
# contains("_Q_"), contains("loop")
# ) %>%
pivot_longer( #PIVOT ON stimulus
cols = contains("_Q_"),
names_to = c("stimulus","dummy","BLOCK","QUESTION"),
values_to = c("value"),
names_sep = "_"
) %>% select(-dummy) %>%
unite(
BLOCK:stimulus, col="STIMULUS", sep="-", remove=FALSE
) %>%
mutate(
BLOCK = factor(BLOCK),
STIMULUS = factor(STIMULUS),
QUESTION = str_replace_all(QUESTION,"-","_"),
QUESTION = factor(QUESTION),
STIMULUS_CATEGORY = str_remove(STIMULUS,"B.-"),
STIMULUS_CATEGORY = factor(STIMULUS_CATEGORY,
levels=c("0","4","3","2","1"),
labels= c("F","D","C","B","A"))
) %>%
select(-stimulus) %>%
# RE-RAVEL UP TO STIMULI
filter(!is.na(value)) %>%
pivot_wider(
names_from = QUESTION,
values_from = value
) %>%
tidyr::unnest() %>% # handle r coerces values to lists
mutate(
across(contains("MAKER_ID") | contains("MAKER_GENDER") | contains("MAKER_AGE"), factor),
across(contains("_CONF") | contains("_LATENCY"), as.numeric),
across(MAKER_DESIGN:MAKER_TRUST, as.numeric),
across(CHART_LIKE:CHART_TRUST, as.numeric),
ENCOUNTER = factor(ENCOUNTER),
# loop_number = as.numeric(loop_number),
# loop_number = ifelse(is.na(loop_number), 0, loop_number),
MAKER_LATENCY = round(MAKER_LATENCY/60,2), #CHANGE TO MINS
CHART_LATENCY = round(CHART_LATENCY/60,2) #CHANGE TO MINS
)
## Warning: `cols` is now required when using `unnest()`.
## ℹ Please use `cols = c()`.
#WRITE A CSV FILE AS THE BASIS FOR THE QUALITATIVE DATA ANALYSIS
write.csv(df_qda_input, file = "data/output/df_qda_input.csv", na="")
#### REMOVE PILOT DATA FROM DF_DATA
## pilot data IS not included QDA file, but not in quant analysis
df_data <- df_data %>%
filter(Scope != "pilot") %>%
mutate(
ID.Study = droplevels(ID.Study),
ID.Prolific = droplevels(ID.Prolific),
ID.Qualtrics = droplevels(ID.Qualtrics),
PID = droplevels(PID)
)
########## CHECK BLOCK COUNTS
# title = "Participants by Condition and Data Collection Modality"
# cols = c("Control Condition","Impasse Condition","Total for Period")
title = "Number of abandoned/screened/rejected attempts "
cols = c("Block","pilot","study2","sum")
cont <- table(df_exclude$Assigned.Block, df_exclude$Scope)
cont %>% addmargins() %>% kbl(caption = title, col.names = cols) %>% kable_classic()
| Block | pilot | study2 | sum |
|---|---|---|---|
| 1 | 107 | 31 | 138 |
| 2 | 0 | 32 | 32 |
| 3 | 0 | 36 | 36 |
| 4 | 0 | 28 | 28 |
| 5 | 0 | 17 | 17 |
| 6 | 0 | 25 | 25 |
| Sum | 107 | 169 | 276 |
title = "Number of abandoned/screened/rejected attempts by type"
cols = c("Rejection Type", "Sum")
cont <- table(df_exclude$EndState)
cont %>% addmargins() %>% kbl(caption = title, col.names = cols) %>% kable_classic()
| Rejection Type | Sum |
|---|---|
| abandoned | 249 |
| didnot-follow-instructions | 25 |
| failed_nonconsent | 5 |
| illegible-english | 2 |
| low-effort | 13 |
| low-quality | 1 |
| screened | 52 |
| Sum | 347 |
title = "Number of successful surveys"
cols = c("Assigned.Block", "Sum")
cont <- table(df_data$Assigned.Block)
cont %>% addmargins() %>% kbl(caption = title, col.names = cols) %>% kable_classic()
| Assigned.Block | Sum |
|---|---|
| 1 | 55 |
| 2 | 52 |
| 3 | 52 |
| 4 | 53 |
| 5 | 53 |
| 6 | 51 |
| Sum | 316 |
title = "Number of successful surveys by distribution"
cols = c("Sampling Platform", "Sum")
cont <- table(df_data$Distribution)
cont %>% addmargins() %>% kbl(caption = title, col.names = cols) %>% kable_classic()
| Sampling Platform | Sum |
|---|---|
| PROLIFIC | 240 |
| TUMBLR | 76 |
| Sum | 316 |
title = "Number of successful surveys by distribution and block"
cols = c("Sampling Platform", "Block-1","Block-2","Block-3","Block-4","Block-5","Block-6", "Sum")
cont <- table(df_data$Distribution, df_data$Assigned.Block)
cont %>% addmargins() %>% kbl(caption = title, col.names = cols) %>% kable_classic()
| Sampling Platform | Block-1 | Block-2 | Block-3 | Block-4 | Block-5 | Block-6 | Sum |
|---|---|---|---|---|---|---|---|
| PROLIFIC | 40 | 40 | 40 | 40 | 40 | 40 | 240 |
| TUMBLR | 15 | 12 | 12 | 13 | 13 | 11 | 76 |
| Sum | 55 | 52 | 52 | 53 | 53 | 51 | 316 |
##### CREATE PARTICIPANT LEVEL SIMPLFIFLIED DATAFRAME
df_participants <- df_data %>%
select(
PID:Assigned.Block,
EndState, Sample, Scope,
duration.sec, duration.min,
contains("D_"),
contains("SCREEN_")
)
# END WRANGLE MASTER WIDE PARTICIPANT LEVEL DATA FRAME
################################################################################################
################################################################################################
print("df_data represents full set of valid complete participants [wide]")
## [1] "df_data represents full set of valid complete participants [wide]"
print("df_participants represents full set of valid complete participants [simplified]")
## [1] "df_participants represents full set of valid complete participants [simplified]"
#4 CREATE QUESTION LEVEL DFS ###################################################################
#### QUESTION LEVEL DATA FRAME (LONG) ##########################
# unravel ALL the way down to questions
# 1 row per participant-graph-question
df_questions <- df_data %>%
select(
PID, duration.min, Assigned.Block,
Sample, Scope, Distribution, PLATFORM,
D_gender:D_politicsFiscal,
contains("_Q_"), contains("loop"),
) %>%
pivot_longer( #PIVOT ON stimulus
cols = contains("_Q_"),
names_to = c("stimulus","dummy","BLOCK","QUESTION"),
values_to = c("value"),
names_sep = "_"
) %>% select(-dummy) %>%
unite(
BLOCK:stimulus, col="STIMULUS", sep="-", remove=FALSE
) %>%
mutate(
BLOCK = factor(BLOCK),
STIMULUS = factor(STIMULUS),
QUESTION = str_replace_all(QUESTION,"-","_"),
QUESTION = factor(QUESTION,
levels = c(
"ENCOUNTER",
"MAKER_ID",
"MAKER_DETAIL",
"MAKER_CONF",
"MAKER_AGE",
"AGE_CONF",
"MAKER_GENDER",
"GENDER_CONF",
"MAKER_DESIGN",
"MAKER_DATA",
"MAKER_POLITIC",
"MAKER_ARGUE",
"MAKER_SELF",
"MAKER_ALIGN",
"MAKER_TRUST",
"MAKER_EXPLAIN",
"MAKER_LATENCY",
"TOOL_ID",
"TOOL_CONF",
"TOOL_DETAIL",
"CHART_LIKE",
"CHART_BEAUTY",
"CHART_INTENT",
"CHART_TRUST",
"CHART_TYPE",
"CHART_ACTION",
"CHART_EXPLAIN",
"CHART_LATENCY",
"loop_number" )),
STIMULUS_CATEGORY = str_remove(STIMULUS,"B.-"),
STIMULUS_CATEGORY = factor(STIMULUS_CATEGORY,
levels=c("0","4","3","2","1"),
labels= c("F","D","C","B","A"))
) %>%
select(-stimulus) %>% filter(!is.na(value))
#### SD QUESTION LEVEL DATA FRAME (wide-stim) ##########################
# ravel up one level from questions
# 1 row per participant-question with all blocks as cols for SD qs
df_sd_questions_wide <- df_questions %>%
select(-BLOCK,-STIMULUS_CATEGORY) %>% #drop block in order to work at stimulus level
filter(QUESTION %in% ref_sd_questions) %>%
pivot_wider(
names_from = STIMULUS,
values_from = value
) %>%
tidyr::unnest() %>% # handle r coerces values to lists
mutate(
across(contains("-") , as.numeric),
QUESTION = droplevels(QUESTION)
)
## Warning: `cols` is now required when using `unnest()`.
## ℹ Please use `cols = c()`.
### SD QUESTIONS LEVEL DATA FRAME (LONG) #####################
df_sd_questions_long <- df_questions %>%
filter(QUESTION %in% ref_sd_questions) %>%
mutate(
QUESTION = droplevels(QUESTION),
value = as.numeric(value)
)
#5 SPECIAL DFS FOR MULTISELECT QS #####################################################
#### MULTI-SELECT QUESTIONS ##########################
## DF_TOOLS
# 1 ROW per PARTICIPANT X STIMULUS X tool_id selection
# tool_id is a multiselect field
df_tools <- df_questions %>%
# select(ID.Prolific, QUESTION, value) %>%
filter(QUESTION %in% c("TOOL_ID", "TOOL_CONF")) %>%
mutate(QUESTION = fct_drop(QUESTION)) %>%
pivot_wider(
names_from = QUESTION,
values_from = value
) %>%
separate_longer_delim(
cols = TOOL_ID,
delim = ","
) %>%
mutate(TOOL_ID = factor(TOOL_ID,
levels = c("?", "design_basic","design_advanced", "viz_basic", "viz_advanced", "programming")),
PID = droplevels(PID),
TOOL_CONF = as.numeric(TOOL_CONF)
)
## DF_ACTIONS
# 1 ROW per PARTICIPANT X STIMULUS X chart_action selection
# chart_action is a multiselect field
df_actions <- df_questions %>%
filter(QUESTION == "CHART_ACTION") %>%
mutate(QUESTION = fct_drop(QUESTION)) %>%
separate_longer_delim(
cols = value,
delim = ","
) %>%
mutate(CHART_ACTION = factor(value,
levels = c("NOTHING — just keep scrolling",
"unfollow / block the source",
"post a comment",
"share / repost",
"share / repost WITH comment",
"look up more information about the topic or source"),
labels = c("nothing",
"unfollow/block",
"comment",
"share",
"share w/ comment",
"seek information")),
PID = droplevels(PID)) %>%
select(-value,-QUESTION)
#6 DFS FOR TRIAL LEVEL ANALYSIS ######################################################
#### CHART LEVEL DATA FRAME (LONG) #####################################################
# roll partway back up from questions
# 1 row per participant X graph
# unnest https://stackoverflow.com/questions/58035452/pivot-wider-outputs-s3-vctrs-list-of-objects
df_graphs_full <- df_questions %>%
pivot_wider(
names_from = QUESTION,
values_from = value
) %>%
tidyr::unnest() %>% # handle r coerces values to lists
mutate(
across(contains("MAKER_ID") | contains("MAKER_GENDER") | contains("MAKER_AGE"), factor),
across(contains("_CONF") | contains("_LATENCY"), as.numeric),
across(MAKER_DESIGN:MAKER_TRUST, as.numeric),
across(CHART_LIKE:CHART_TRUST, as.numeric),
ENCOUNTER = factor(ENCOUNTER),
# loop_number = as.numeric(loop_number),
# loop_number = ifelse(is.na(loop_number), 0, loop_number),
MAKER_LATENCY = round(MAKER_LATENCY/60,2), #CHANGE TO MINS
CHART_LATENCY = round(CHART_LATENCY/60,2), #CHANGE TO MINS
MAKER_ID = factor( MAKER_ID,levels = c("business", "political", "education","news","organization","individual"))
)
## Warning: `cols` is now required when using `unnest()`.
## ℹ Please use `cols = c()`.
#7 DFS FOR TRIAL LEVEL ANALYSIS w/o free response ######################################################
#### CHART LEVEL DATA FRAME (LONG) ##########################
## SUBSET OF COLUMNS EXCLUDING THE FREE RESPONSES and MULTISELECT
df_graphs <- df_graphs_full %>%
select( !where(is.character))
print("df_graphs_full is trial level dataset; df_graphs is trial level quantitative data only")
## [1] "df_graphs_full is trial level dataset; df_graphs is trial level quantitative data only"
## CALCULATE AV CONFIDENCE FOR EACH SEMANTIC DIFFERENTIALS
df_sds <- df_sd_questions_long %>%
group_by(PID, QUESTION) %>%
summarize(
mean = mean(value),
sd = sd (value)
)
## `summarise()` has grouped output by 'PID'. You can override using the `.groups`
## argument.
df_sds_WIDE <- df_sds %>%
pivot_wider(
names_from = QUESTION,
values_from = c(mean, sd)
)
## CALCULATE AV CONFIDENCE ACROSS SEMANTIC DIFFERENTIALS
df_grandsds <- df_sd_questions_long %>%
group_by(PID) %>%
summarize(
SDIFF_mean = mean(value),
SDIFF_sd = sd(value)
)
df_grandsds_LONG <- df_grandsds %>%
mutate(
QUESTION = "SDIFF",
mean = SDIFF_mean,
sd = SDIFF_sd
) %>% select(-SDIFF_mean, -SDIFF_sd)
## CALCULATE AV CONFIDENCE FOR EACH CONFIDENCE
df_confq <- df_questions %>%
filter(QUESTION %in% ref_conf_questions)%>%
group_by(PID, QUESTION) %>%
summarize(
mean = mean(as.numeric(value)),
sd = sd (as.numeric(value))
)
## `summarise()` has grouped output by 'PID'. You can override using the `.groups`
## argument.
df_confq_WIDE <- df_confq %>%
pivot_wider(
names_from = QUESTION,
values_from = c(mean, sd)
)
## CALCULATE AV CONFIDENCE ACROSS SEMANTIC DIFFERENTIALS
df_grandconfs <- df_questions %>%
filter(QUESTION %in% ref_conf_questions)%>%
group_by(PID) %>%
summarize(
CONF_mean = mean(as.numeric(value)),
CONF_sd = sd(as.numeric(value))
)
df_grandconfs_LONG <- df_grandconfs %>%
mutate(
QUESTION = "CONF",
mean = CONF_mean,
sd = CONF_sd
) %>% select(-CONF_mean, -CONF_sd)
## JOIN TOGETHER WIDE
df_confidence_wide <- left_join(df_sds_WIDE, df_confq_WIDE, by="PID")
df_confidence_wide <- left_join(df_confidence_wide, df_grandsds, by = "PID")
df_confidence_wide <- left_join(df_confidence_wide, df_grandconfs, by = "PID")
df_confidence_wide <- left_join(df_confidence_wide, df_participants, by = "PID")
##JOIN TOGETHER LONG
df_confidence_long <- rbind(df_sds, df_confq, df_grandsds_LONG, df_grandconfs_LONG) %>%
mutate(
QUESTION = factor(QUESTION)
)
df_confidence_long <- left_join(df_confidence_long, df_participants, by="PID")
rm(df_sds, df_grandsds, df_confq, df_grandconfs, df_sds_WIDE, df_grandsds_LONG, df_confq_WIDE, df_grandconfs_LONG)
print("df_confidence_wide is a participant level dataset with by question and question type mean and df per participant")
## [1] "df_confidence_wide is a participant level dataset with by question and question type mean and df per participant"
print("df_confidence_long is a participant level dataset with by question and question type mean and df per participant")
## [1] "df_confidence_long is a participant level dataset with by question and question type mean and df per participant"
print("In both cases the means are over all 5 stimuli seen by each participant")
## [1] "In both cases the means are over all 5 stimuli seen by each participant"
#8 TRIAL LEVEL DATA INCLUDING QDA CODING ############################################################
#### CHART LEVEL DATA FRAME (LONG) ##########################
# ### TODO PICK UP HERE
#
#
# # import coded QDA data
# # participant X chart level
# df_coded <- read_csv("data/input/QDA/s2_qda_maker.csv", col_names = TRUE)
# # create unique key for join
# df_coded <- df_coded %>% filter(
# Scope != "pilot" #filter out pilot data
# ) %>% mutate(
# TRIAL = paste0(STIMULUS,"_",ID.Prolific)
# ) %>% select(
# #TODO ADD CODED DATA THEY ARE ADDED
# TRIAL, CODE_M_ID_SPECIFIC
# )
#
# # create base dataframe
# df_graphs_coded <- df_graphs_full %>% mutate(
# TRIAL = paste0(STIMULUS,"_",ID.Prolific)
# )
#
# ##SANITY CHECK
# print("coded df & trials df have same num rows")
# nrow(df_coded) == nrow(df_graphs_coded)
#
# df_graphs_coded <- dplyr::left_join(df_graphs_coded, df_coded, by="TRIAL") %>%
# select(
# #reorder cols
# duration.min:D_politicsFiscal, BLOCK:MAKER_ID, MAKER_CONF:CHART_ACTION, STIMULUS, ID.Prolific, TRIAL, MAKER_DETAIL, CODE_M_ID_SPECIFIC
# )
#
# ##SANITY CHECK
# print("coded df & trials df have same num rows")
# nrow(df_graphs) == nrow(df_graphs_coded)
#
# #####################################################################################
# ################ SANITY CHECKS ######################################################
# #CHECK BLOCKS PER PARTICIPANT
# # every participant should have 2 blocks
# check_subject_blocks <- df_questions %>%
# group_by(ID.Prolific) %>% summarise(
# n_block = length(unique(BLOCK))
# )
# print("every participant should have two blocks, [block 0 + randomly assigned block]")
# all(check_subject_blocks$n_block==2)
#
#
# #CHECK STIMULI PER PARTICIPANT
# # every participant should have 5 graphs
# check_subject_graphs <- df_questions %>%
# group_by(ID.Prolific) %>%summarise(
# n_graphs = length(unique(STIMULUS))
# ) #each participant should have five graphs
# print("every participant should have five stimuli [B0 + 4 stimuli in a block]]")
# all(check_subject_graphs$n_graphs==5)
#
#
#
#
#
#
#
#
# #CLEANUP
# rm(check_subject_blocks, check_subject_graphs, df_graphs_qda, df_coded)
############## RETURNS SD STACKED AND COLORED BY BY X
## LOOP STYLE
multi_sd <- function (df, left, right, x, y, color) {
# g <- ggplot(df, aes(y = .data[[x]], x = {{y}}, color = {{color}}))+
g <- ggplot(df, aes(y = .data[[x]], x = .data[[y]], color = .data[[color]]))+
geom_boxplot(width = 0.5) +
geom_jitter(width = 0.1, alpha=0.5) +
scale_y_continuous(limits=c(-1,101)) +
labs(x="", y="") +
coord_flip() +
guides(
y = guide_axis_manual(labels = left),
y.sec = guide_axis_manual(labels = right)
) + theme_minimal()
return(g)
}
############## RETURNS SINGLE SD
## LOOP STYLE
single_sd <- function (df, left, right, x) {
g <- ggplot(df, aes(y = {{x}}, x = ""))+
geom_boxplot(width = 0.5) +
geom_jitter(width = 0.1, alpha=0.5) +
scale_y_continuous(limits=c(-1,101)) +
labs(x="", y="") +
coord_flip() +
guides(
y = guide_axis_manual(labels = left),
y.sec = guide_axis_manual(labels = right)
) + theme_minimal()
return(g)
}
######### RETURNS SINGLE SD
## APPLY STYLE
plot_sd = function (data, column, type, split, boxplot) {
ggplot(df, aes(y = .data[[column]], x="")) +
{if(boxplot) geom_boxplot(width = 0.5) } +
geom_jitter(width = 0.1, alpha=0.3, {if(split) aes(color=Distribution)}) +
{if(split) facet_grid(Distribution ~ .)} +
scale_y_continuous(limits=c(-1,101)) +
labs(x="", y="") +
coord_flip() +
{if(type == "S")
guides(
y = guide_axis_manual(labels = ref_labels[column,"left"]),
y.sec = guide_axis_manual(labels = ref_labels[column,"right"])
)} +
{if(type == "Q")
guides(
y = guide_axis_manual(labels = ref_labels[q,"left"]),
y.sec = guide_axis_manual(labels = ref_labels[q,"right"])
)} +
theme_minimal() +
labs (
caption = column
)
}
dfSummary(df_data %>% select(PID, duration.min, Distribution, Assigned.Block, PLATFORM,
contains("D_"), Prolific.Name:Scope, contains("SCREEN_")),
headings = TRUE,
plain.ascii = FALSE,
style = 'grid',
graph.magnif = 0.85,
varnumbers = FALSE,
valid.col = FALSE,
tmp.img.dir = "/tmp")
Dimensions: 316 x 29
Duplicates: 0
| Variable | Stats / Values | Freqs (% of Valid) | Graph | Missing |
|---|---|---|---|---|
| PID [factor] |
1. R_11XUYk3OpC3HJPM 2. R_133GgyRbPsqfxPD 3. R_1A0uAWls6AhmTfr 4. R_1aUA7MBz4XYLbkl 5. R_1azgETg5gjYGzUf 6. R_1Bc6WwJjiC9VBVn 7. R_1Cq1wvlMciQUHh8 8. R_1D2krAejiiSE3Ln 9. R_1dz1Y8zwrsbEbB6 10. R_1EnkVRz2u8SWbla [ 306 others ] |
1 ( 0.3%) 1 ( 0.3%) 1 ( 0.3%) 1 ( 0.3%) 1 ( 0.3%) 1 ( 0.3%) 1 ( 0.3%) 1 ( 0.3%) 1 ( 0.3%) 1 ( 0.3%) 306 (96.8%) |
0 (0.0%) |
|
| duration.min [numeric] |
Mean (sd) : 44.7 (25.7) min < med < max: 10.9 < 38.3 < 227.6 IQR (CV) : 25.8 (0.6) |
303 distinct values | 0 (0.0%) |
|
| Distribution [factor] |
1. PROLIFIC 2. TUMBLR |
240 (75.9%) 76 (24.1%) |
0 (0.0%) |
|
| Assigned.Block [factor] |
1. 1 2. 2 3. 3 4. 4 5. 5 6. 6 |
55 (17.4%) 52 (16.5%) 52 (16.5%) 53 (16.8%) 53 (16.8%) 51 (16.1%) |
0 (0.0%) |
|
| PLATFORM [factor] |
1. Facebook 2. Instagram 3. LinkedIn 4. Tumblr 5. Twitter/X |
73 (23.1%) 93 (29.4%) 8 ( 2.5%) 73 (23.1%) 69 (21.8%) |
0 (0.0%) |
|
| D_gender [factor] |
1. Female 2. Male 3. Non-binary / third gender 4. Prefer not to say 5. Prefer to self-describe |
157 (49.7%) 106 (33.5%) 37 (11.7%) 3 ( 0.9%) 13 ( 4.1%) |
0 (0.0%) |
|
| D_gender_4_TEXT [character] |
1. genderqueer 2. Agender 3. gender fluid 4. genderqueer (dislike term 5. genderqueer trans man 6. Genderqueer wlw 7. genderqueer woman 8. queer 9. She/They 10. Trans male [ 2 others ] |
2 (15.4%) 1 ( 7.7%) 1 ( 7.7%) 1 ( 7.7%) 1 ( 7.7%) 1 ( 7.7%) 1 ( 7.7%) 1 ( 7.7%) 1 ( 7.7%) 1 ( 7.7%) 2 (15.4%) |
303 (95.9%) |
|
| D_race [character] |
1. White or Caucasian 2. Asian 3. Black or African American 4. Other 5. White or Caucasian,Asian 6. White or Caucasian,Black 7. White or Caucasian,Other 8. Prefer not to say 9. White or Caucasian,Americ 10. American Indian/Native Am [ 4 others ] |
211 (66.8%) 34 (10.8%) 34 (10.8%) 11 ( 3.5%) 5 ( 1.6%) 4 ( 1.3%) 4 ( 1.3%) 3 ( 0.9%) 3 ( 0.9%) 2 ( 0.6%) 5 ( 1.6%) |
0 (0.0%) |
|
| D_education [factor] |
1. NA 2. some high school or less 3. high school diploma or GE 4. some college 5. associates or technical d 6. undergradudate degree 7. graduate or professional |
0 ( 0.0%) 4 ( 1.3%) 28 ( 8.9%) 63 (19.9%) 33 (10.4%) 133 (42.1%) 55 (17.4%) |
0 (0.0%) |
|
| D_employmentStatus [factor] |
1. A homemaker or stay-at-ho 2. Other 3. Retired 4. Student 5. Unemployed and looking fo 6. Working full-time 7. Working part-time |
13 ( 4.1%) 11 ( 3.5%) 5 ( 1.6%) 30 ( 9.5%) 50 (15.8%) 156 (49.4%) 51 (16.1%) |
0 (0.0%) |
|
| D_income [factor] |
1. Prefer not to say 2. Less than $25,000 3. $25,000-$49,999 4. $50,000-$74,999 5. $75,000-$99,999 6. $100,000-$149,999 7. $150,000 or more |
14 ( 4.4%) 58 (18.4%) 64 (20.3%) 72 (22.8%) 37 (11.7%) 47 (14.9%) 24 ( 7.6%) |
0 (0.0%) |
|
| D_work_detail [character] |
1. Unemployed 2. N/A 3. Student 4. homemaker 5. IT 6. Manager 7. Artist 8. Cashier 9. Consultant 10. Disabled [ 269 others ] |
9 ( 2.9%) 5 ( 1.6%) 5 ( 1.6%) 4 ( 1.3%) 3 ( 1.0%) 3 ( 1.0%) 2 ( 0.6%) 2 ( 0.6%) 2 ( 0.6%) 2 ( 0.6%) 277 (88.2%) |
2 (0.6%) |
|
| D_zipcode [character] |
1. 30019 2. 11105 3. 20149 4. 27858 5. 28645 6. 37917 7. 60615 8. 77062 9. 85210 10. 95123 [ 293 others ] |
3 ( 1.0%) 2 ( 0.6%) 2 ( 0.6%) 2 ( 0.6%) 2 ( 0.6%) 2 ( 0.6%) 2 ( 0.6%) 2 ( 0.6%) 2 ( 0.6%) 2 ( 0.6%) 294 (93.3%) |
1 (0.3%) |
|
| D_age [factor] |
1. 18-24 2. 25-34 3. 35-44 4. 45-54 5. 55-64 6. 65+ years |
67 (21.2%) 127 (40.2%) 59 (18.7%) 46 (14.6%) 14 ( 4.4%) 3 ( 0.9%) |
0 (0.0%) |
|
| D_politicalParty [factor] |
1. No preference 2. Other 3. Independent 4. Republican 5. Democrat |
12 ( 3.8%) 28 ( 8.9%) 87 (27.5%) 27 ( 8.5%) 162 (51.3%) |
0 (0.0%) |
|
| D_politicalParty_OTHER [character] |
1. Socialist 2. leftist 3. socialist 4. Liberal 5. Progressive 6. A progressive who must vo 7. Anarchist 8. Freedom and peace party 9. Green 10. I don’t like any [ 9 others ] |
4 (14.3%) 3 (10.7%) 3 (10.7%) 2 ( 7.1%) 2 ( 7.1%) 1 ( 3.6%) 1 ( 3.6%) 1 ( 3.6%) 1 ( 3.6%) 1 ( 3.6%) 9 (32.1%) |
288 (91.1%) |
|
| D_politicsSocial [numeric] |
Mean (sd) : 26.7 (26.5) min < med < max: 0 < 20 < 100 IQR (CV) : 42.2 (1) |
65 distinct values | 0 (0.0%) |
|
| D_politicsFiscal [numeric] |
Mean (sd) : 33.9 (28.4) min < med < max: 0 < 30.5 < 100 IQR (CV) : 43 (0.8) |
76 distinct values | 0 (0.0%) |
|
| Prolific.Name [factor] |
1. MAG_S2_PROLIFIC_BLUECOLLA 2. MAG_S2_PROLIFIC_DATACOLLA 3. MAG_S2_PROLIFIC_GENERAL_B 4. MAG_S2_PROLIFIC_GENERAL_B 5. MAG_S2_PROLIFIC_GENERAL_B 6. MAG_S2_PROLIFIC_GENERAL_B 7. MAG_S2_PROLIFIC_GENERAL_B 8. MAG_S2_PROLIFIC_GENERAL_B 9. MAG_S2_PROLIFIC_GENERAL_B 10. MAG_S2_PROLIFIC_GENERAL_B [ 4 others ] |
0 ( 0.0%) 0 ( 0.0%) 40 (12.7%) 20 ( 6.3%) 20 ( 6.3%) 18 ( 5.7%) 22 ( 7.0%) 21 ( 6.6%) 19 ( 6.0%) 19 ( 6.0%) 137 (43.4%) |
0 (0.0%) |
|
| Qualtrics.Survey [factor] |
1. MAG_S2_PROLIFIC_GENERAL_1 2. MAG_S2_PROLIFIC_GENERAL_2 3. MAG_S2_PROLIFIC_GENERAL_3 4. MAG_S2_PROLIFIC_GENERAL_4 5. MAG_S2_PROLIFIC_GENERAL_5 6. MAG_S2_PROLIFIC_GENERAL_6 7. MAG_S2_PROLIFIC-BLUECOLLA 8. MAG_S2_PROLIFIC-DATACOLLA 9. MAG_S2_TUMBLR_FREE 10. MAG_S2_TUMBLR_PAID |
40 (12.7%) 40 (12.7%) 40 (12.7%) 40 (12.7%) 40 (12.7%) 40 (12.7%) 0 ( 0.0%) 0 ( 0.0%) 13 ( 4.1%) 63 (19.9%) |
0 (0.0%) |
|
| Qualtrics.URL [factor] |
1. https://mit.co1.qualtrics\ 2. https://mit.co1.qualtrics\ 3. https://mit.co1.qualtrics\ 4. https://mit.co1.qualtrics\ 5. https://mit.co1.qualtrics\ 6. https://mit.co1.qualtrics\ 7. https://mit.co1.qualtrics\ 8. https://mit.co1.qualtrics\ 9. https://mit.co1.qualtrics\ 10. https://mit.co1.qualtrics | 40 (12.7%) 63 (19.9%) 40 (12.7%) 13 ( 4.1%) 40 (12.7%) 40 (12.7%) 0 ( 0.0%) 0 ( 0.0%) 40 (12.7%) 40 (12.7%) |
0 (0.0%) |
|
| Description [character] |
1. TUMBLR-paid 2. block1-full 3. block3-fill 4. block4-20 5. block5-fill 6. block6-fill 7. block2-10 8. block2-fill 9. block4-fill 10. block5-20 [ 3 others ] |
63 (19.9%) 40 (12.7%) 22 ( 7.0%) 21 ( 6.6%) 21 ( 6.6%) 21 ( 6.6%) 20 ( 6.3%) 20 ( 6.3%) 19 ( 6.0%) 19 ( 6.0%) 50 (15.8%) |
0 (0.0%) |
|
| Sample [factor] |
1. blue-collar 2. data-collar 3. general-prolific 4. tumblr-free 5. tumblr-paid |
0 ( 0.0%) 0 ( 0.0%) 240 (75.9%) 13 ( 4.1%) 63 (19.9%) |
0 (0.0%) |
|
| Scope [factor] |
1. pilot 2. study2 |
0 ( 0.0%) 316 (100.0%) |
0 (0.0%) |
|
| SCREEN_workMethod [character] |
1. btwn-50-75 2. btwn25-50 3. less-25 4. more-75 |
49 (15.5%) 18 ( 5.7%) 43 (13.6%) 206 (65.2%) |
0 (0.0%) |
|
| SCREEN_workFunction [character] |
1. other 2. Operations 3. Education-Professional 4. IT 5. Research 6. Administration-PersonalAs 7. Healthcare-Professional 8. Design-Creative 9. Sales-Business-Developmen 10. Data-Analysis [ 65 others ] |
72 (22.8%) 24 ( 7.6%) 20 ( 6.3%) 18 ( 5.7%) 18 ( 5.7%) 15 ( 4.7%) 15 ( 4.7%) 12 ( 3.8%) 12 ( 3.8%) 10 ( 3.2%) 100 (31.6%) |
0 (0.0%) |
|
| SCREEN_workFunction_TEXT [character] |
1. N/A 2. Student 3. Unemployed 4. n/a 5. unemployed 6. Logistics 7. Not applicable 8. Alternative Health 9. Business services 10. Business support [ 45 others ] |
7 ( 9.5%) 5 ( 6.8%) 4 ( 5.4%) 3 ( 4.1%) 3 ( 4.1%) 2 ( 2.7%) 2 ( 2.7%) 1 ( 1.4%) 1 ( 1.4%) 1 ( 1.4%) 45 (60.8%) |
242 (76.6%) |
|
| SCREEN_socialMedia [character] |
1. Twitter,Reddit,LinkedIn 2. Facebook,Instagram,Reddit 3. Facebook,Twitter,Instagra 4. Facebook,Twitter,Youtube, 5. Facebook,Instagram,Reddit 6. Facebook,Twitter,Youtube, 7. Facebook,Twitter,Reddit,L 8. Facebook,Youtube,Reddit,L 9. Facebook,Instagram 10. Facebook,LinkedIn [ 183 others ] |
8 ( 2.5%) 7 ( 2.2%) 7 ( 2.2%) 7 ( 2.2%) 6 ( 1.9%) 6 ( 1.9%) 5 ( 1.6%) 5 ( 1.6%) 4 ( 1.3%) 4 ( 1.3%) 257 (81.3%) |
0 (0.0%) |
|
| SCREEN_socialMedia_TEXT [character] |
1. Discord 2. Bluesky 3. 4chan 4. alt tech, to avoid exactl 5. discord 6. Livejournal 7. mastedons,telegram 8. Monster, GlassDoor 9. nextdoor 10. Pinterest [ 4 others ] |
4 (22.2%) 2 (11.1%) 1 ( 5.6%) 1 ( 5.6%) 1 ( 5.6%) 1 ( 5.6%) 1 ( 5.6%) 1 ( 5.6%) 1 ( 5.6%) 1 ( 5.6%) 4 (22.2%) |
298 (94.3%) |
print("Explore shapes of the prepared dataframes")
## [1] "Explore shapes of the prepared dataframes"
qacBase::df_plot(df_graphs)
qacBase::df_plot(df_questions)
qacBase::df_plot(df_sd_questions_long)
qacBase::df_plot(df_sd_questions_wide)
qacBase::barcharts(df_participants)
## The following variable had more than 20 levels and were not graphed:
## PID D_work_detail D_zipcode SCREEN_workFunction SCREEN_workFunction_TEXT SCREEN_socialMedia
df <- df_data
## BOXPLOT — SURVEY RESPONSE TIME
ggplot(df_data, aes(x=fct_rev(Assigned.Block), y=duration.min, color=Assigned.Block))+
geom_boxplot(position=position_dodge(0.9))+
geom_jitter(position=position_jitterdodge(), alpha = 0.3) +
facet_grid(Distribution ~.) +
coord_flip() +
labs( y = "Survey Response Time (mins)", x="",
title = "TOTAL Response Time by Sample",
subtitle = "(distributions of response times are similar across samples, as expected)") +
theme_minimal() + theme(legend.position = "none")
## RIDGEPLOT — SURVEY RESPONSE TIME
ggplot(df, aes(x = duration.min, y = fct_rev(Assigned.Block), fill = fct_rev(Assigned.Block))) +
geom_density_ridges(scale=0.8) +
# geom_boxplot()+
stat_pointinterval()+
theme_ridges() +
scale_fill_discrete(direction=-1)+
facet_grid(Distribution ~.) +
theme_minimal() +
theme(legend.position = "none") +
labs( x = "Survey Response Time (mins)", y="",
title = "TOTAL Response Time by Sample",
subtitle = "(distributions of response times are similar across samples, as expected)")
## Picking joint bandwidth of 7.37
## Picking joint bandwidth of 11
desc.duration <- psych::describe(df_data$duration.min)
## TODO TUMBLR AS WELL
Participant response times ranged from 10.88 to 227.57 minutes, with a mean response time of 44.67 minutes, SD = 25.66.
df <- df_graphs
## BOXPLOT — MAKER RESPONSE TIME
ggplot(df, aes(x=STIMULUS_CATEGORY, y=MAKER_LATENCY, color=STIMULUS_CATEGORY))+
geom_boxplot(position=position_dodge(0.9))+
geom_jitter(position=position_jitterdodge(jitter.width=1.5), alpha = 0.3) +
facet_grid(Distribution ~ .)+
scale_color_viridis(discrete=TRUE, option="viridis") +
coord_flip() +
labs( y = "MAKER Page Response Time (mins)", x="",
title = "MAKER Page Response Time by Stimulus Category",
subtitle = "(distributions of response times are similar across samples, as expected)") +
theme_minimal() + theme(legend.position = "none")
## RIDGEPLOT — MAKER PAGE RESPONSE
ggplot(df, aes(x = MAKER_LATENCY, y = STIMULUS_CATEGORY, fill = STIMULUS_CATEGORY)) +
geom_density_ridges(scale=0.8) +
# geom_boxplot()+
stat_pointinterval()+
# theme_ridges() +
facet_grid(Distribution ~ .) +
scale_fill_viridis(discrete=TRUE, option="viridis") +
theme_minimal() +
theme(legend.position = "none") +
labs( x = "MAKER Page Response Time (mins)", y="",
title = "MAKER Page Response Time by Stimulus Category",
subtitle = "(distributions of response times are similar across samples, as expected)")
## Picking joint bandwidth of 0.383
## Picking joint bandwidth of 0.756
Maker page response times ranged from 0.37 to 39.52 minutes, with a mean response time of 3 minutes.
df <- df_graphs
## BOXPLOT — MAKER RESPONSE TIME
ggplot(df, aes(x=STIMULUS_CATEGORY, y=CHART_LATENCY, color=STIMULUS_CATEGORY))+
geom_boxplot(position=position_dodge(0.9))+
geom_jitter(position=position_jitterdodge(jitter.width=1.5), alpha = 0.3) +
scale_color_viridis(discrete=TRUE, option="viridis") +
facet_grid(Distribution ~ .) +
coord_flip() +
labs( y = "CHART Page Response Time (mins)", x="",
title = "CHART Page Response Time by Stimulus Category",
subtitle = "(distributions of response times are similar across samples, as expected)") +
theme_minimal() + theme(legend.position = "none")
## RIDGEPLOT — MAKER PAGE RESPONSE
ggplot(df, aes(x = CHART_LATENCY, y = STIMULUS_CATEGORY, fill = STIMULUS_CATEGORY)) +
geom_density_ridges(scale=0.8) +
# geom_boxplot()+
stat_pointinterval()+
# theme_ridges() +
scale_fill_viridis(discrete=TRUE, option="viridis") +
facet_grid(Distribution ~ .) +
theme_minimal() +
theme(legend.position = "none") +
labs( x = "CHART Page Response Time (mins)", y="",
title = "CHART Page Response Time by Stimulus Category",
subtitle = "(distributions of response times are similar across samples, as expected)")
## Picking joint bandwidth of 0.246
## Picking joint bandwidth of 0.337
Chart page response times ranged from 0.19 to 20.21 minutes, with a mean response time of 1.7 minutes.
# AGE by SAMPLE
ggplot(data = df_data, aes( x = Assigned.Block, fill = fct_rev(D_age) )) +
geom_bar(position = "stack") +
facet_grid(Distribution ~ .) +
labs( title = "AGE by Sample", subtitle = "Expect similiar across samples", x = "") +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
theme_minimal() +
easy_add_legend_title("")
# OVERALL AGE
ggstatsplot::ggbarstats(df_data, x= D_age, y=Distribution) +
theme_minimal() +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
easy_add_legend_title("Age") +
labs (title = "Participant Age")
# GENDER by SAMPLE
ggplot(data = df_data, aes( x = Assigned.Block, fill = fct_rev(D_gender) )) +
geom_bar(position = "stack") +
facet_grid(Distribution ~ .) +
labs( title = "Gender by Sample", subtitle = "Expect similiar across samples", x = "") +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
theme_minimal()
# easy_add_legend_title("")
# OVERALL AGE
ggstatsplot::ggbarstats(df_data, x= D_gender, y=Distribution) +
theme_minimal() +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
easy_add_legend_title("Gender") +
labs (title = "Participant Gender")
#PROLIFIC
df.p <- df_data %>% filter(Distribution == "PROLIFIC")
desc.gender.p <- table(df.p$D_gender) %>% prop.table()
names(desc.gender.p) <- levels(df.p$D_gender)
p_participants <- nrow(df.p)
#TUMBLR
df.t <- df_data %>% filter(Distribution == "TUMBLR")
desc.gender.t <- table(df.t$D_gender) %>% prop.table()
names(desc.gender.t) <- levels(df.t$D_gender)
t_participants <- nrow(df.t)
title = "Participant Gender — Self Describe"
cols = c("Text","Count")
cont <- table(df_data$D_gender_4_TEXT)
cont %>% addmargins() %>% kbl(caption = title, col.names = cols) %>% kable_classic()
| Text | Count |
|---|---|
| Agender | 1 |
| gender fluid | 1 |
| genderqueer | 2 |
| genderqueer (dislike term non-binary) | 1 |
| genderqueer trans man | 1 |
| Genderqueer wlw | 1 |
| genderqueer woman | 1 |
| queer | 1 |
| She/They | 1 |
| Trans male | 1 |
| Transgender male | 1 |
| transmasc | 1 |
| Sum | 13 |
240 individuals from Prolific participated in Study 2, ( 54% Female, 42% Male, 3% Non-binary, 1% Other).
Note that a higher proportion of participants recruited from Tumblr represent identities other than cis-gender Female and cis-gender Male. 76 individuals from Tumblr participated in Study 2, ( 37% Female, 5% Male, 39% Non-binary, 18% Other).
df <- df_data
# EDUCATION by SAMPLE
ggplot(data = df, aes( x = Assigned.Block, fill = D_education )) +
geom_bar(position = "stack") +
facet_grid(Distribution ~ .)+
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs( title = "EDUCATION by Sample",
subtitle = "Expect similiar across samples") +
theme_minimal() +
easy_add_legend_title("Education")
# INCOME BY EDUCATION
ggstatsplot::ggbarstats(df_data, x= D_education, y=D_income) +
theme_minimal() +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs (title = "INCOME by EDUCATION")
df <- df_graphs
# FISCAL POLITICAL SAMPLE
leftside <- rep("left-leaning", length(ref_blocks))
rightside <- rep("right-leaning", length(ref_blocks))
g <- ggplot(df_data, aes(x=fct_rev(Assigned.Block), y=D_politicsFiscal, color=Assigned.Block)) +
geom_boxplot(position=position_dodge(0.9), width = 0.5)+
geom_jitter(position=position_jitterdodge(), alpha = 0.3) +
facet_grid(Distribution ~ .)+
labs( title = "FISCAL VALUES by Sample",
subtitle = "(expect similar values across samples)",
y = "Fiscal Politics", x = "") +
# easy_add_legend_title("Sample") +
theme_minimal() +
coord_flip()
g + guides(
y = guide_axis_manual(
breaks = ref_blocks,
labels = leftside
),
y.sec = guide_axis_manual(
breaks = ref_blocks,
labels = rightside
))
# SOCIAL POLITICAL SAMPLE
leftside <- rep("left-leaning", length(ref_blocks))
rightside <- rep("right-leaning", length(ref_blocks))
g <- ggplot(df_data, aes(x=fct_rev(Assigned.Block), y=D_politicsSocial, color=Assigned.Block)) +
geom_boxplot(position=position_dodge(0.9), width = 0.5)+
geom_jitter(position=position_jitterdodge(), alpha = 0.3) +
facet_grid(Distribution ~ .)+
labs( title = "SOCIAL VALUES by Sample",
subtitle = "(expect similar values across samples)",
y = "Social Politics", x = "") +
# easy_add_legend_title("Sample") +
theme_minimal() +
coord_flip()
g + guides(
y = guide_axis_manual(
breaks = ref_blocks,
labels = leftside
),
y.sec = guide_axis_manual(
breaks = ref_blocks,
labels = rightside
))
r_politics <- nrow(df_data %>% filter(D_politicsSocial >= 50)) / nrow(df_data)
l_politics <- nrow(df_data %>% filter(D_politicsSocial < 50)) / nrow(df_data)
r_fiscal <- nrow(df_data %>% filter(D_politicsFiscal >= 50)) / nrow(df_data)
l_fiscal <- nrow(df_data %>% filter(D_politicsFiscal < 50)) / nrow(df_data)
rm(g)
#PROLIFIC
df.p <- df_data %>% filter(Distribution == "PROLIFIC")
desc.fiscal.p <- psych::describe(df.p$D_politicsFiscal)
desc.social.p <- psych::describe(df.p$D_politicsSocial)
#TUMBLR
df.t <- df_data %>% filter(Distribution == "TUMBLR")
desc.fiscal.t <- psych::describe(df.t$D_politicsFiscal)
desc.social.t <- psych::describe(df.t$D_politicsSocial)
For the 240 participants recruited from Prolific, a spectrum of Social Political values [ranging from 0 (left-leaning) to 100 (right-leaning)] ranged from 0 to 100, with a mean value of 32.27, SD = 27.49. A spectrum of Fiscal Political values [ranging from 0 (left-leaning) to 100 (right-leaning)] ranged from 0 to 100, with a mean value of 39.4, SD = 29.06.
For the 76 participants recruited from Tumblr, a spectrum of Social Political values [ranging from 0 (left-leaning) to 100 (right-leaning)] ranged from 0 to 50, with a mean value of 9.24, SD = 11.16. A spectrum of Fiscal Political values [ranging from 0 (left-leaning) to 100 (right-leaning)] ranged from 0 to 91, with a mean value of 16.41, SD = 17.06.
Overall, 77.85% of respondents identify with left-leaning social values (vs) 22.15% identifying as right-leaning; while 66.14% of respondents reported left-leaning fiscal values (vs) 33.86% identify as right-leaning.
df <- df_graphs %>% select(PID, Assigned.Block, Distribution, D_politicsSocial, D_politicsFiscal, D_politicalParty) %>%
mutate(
d_social = D_politicsSocial,
d_fiscal = D_politicsFiscal
)
ggplot(df, aes(x = d_social, y = d_fiscal, color = D_politicalParty)) +
geom_point() +
geom_hline(yintercept = 50) +
geom_vline(xintercept = 50) +
facet_grid(Distribution~D_politicalParty)+
labs(
title = "Social and Fiscal Political Values by Political Party Affiliation",
x = "Social Values", y = "Fiscal Values"
)+
theme_minimal() +
easy_remove_legend() +
easy_remove_axes()
# POLITICAL PARTY
ggplot(data = df_data, aes( fill = D_politicalParty, x = Assigned.Block )) +
geom_bar(position = "fill") +
facet_grid(Distribution ~ .) +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs( title = "POLITICAL PARTY by Sample",
subtitle = "Expect similiar across samples", x = "") +
theme_minimal() +
easy_add_legend_title("")
#PLATFORM CHOICE
ggplot( df_data, aes( x = Assigned.Block, fill = PLATFORM)) +
geom_bar(position = "stack") +
facet_grid(Distribution ~ .) +
labs( title = "PLATFORM CHOICE by Sample",
subtitle = "Expect similiar across samples", x = "") +
scale_fill_viridis(discrete=TRUE, option="viridis") +
easy_add_legend_title("") +
theme_minimal()
How confident are participants in their attributions? We want to explore this from several perspectives, including: participant-level differences, question-level differences, and stimulus-level differences
## QUESTION LEVEL CONFIDENCE
df <- df_questions %>%
filter(QUESTION %in% ref_conf_questions) %>%
mutate(value= as.numeric(value))
(g <- ggplot(df, aes(x = value, fill = QUESTION)) +
geom_histogram() +
facet_wrap(. ~ QUESTION) + #, scales = "free", space = "free", drop = TRUE
theme_minimal() +
labs(title = "Distrubtion of Confidence by Question") +
easy_remove_legend())
ggsave(plot = g, path="figs/histograms", filename =paste0("confidence_","histograms.png"), units = c("in"))
## QUESTION LEVEL CONFIDENCE
df <- df_questions %>%
filter(QUESTION %in% ref_sd_questions) %>%
mutate(value= as.numeric(value))
(g <- ggplot(df, aes(x = value, fill = QUESTION)) +
geom_histogram() +
facet_grid( STIMULUS~ QUESTION) +
theme_minimal() +
labs(title = "Distrubtion of SEMANTIC DIFFERENTIAL SCALES by Question") +
easy_remove_legend())
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggsave(plot = g, path="figs/histograms", filename =paste0("sd_scales_by_question_stimulus","histograms.png"), units = c("in"), width = 20, height = 30)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## QUESTION LEVEL CONFIDENCE
df <- df_questions %>%
filter(QUESTION %in% ref_sd_questions) %>%
mutate(value= as.numeric(value))
(g <- ggplot(df, aes(x = value, fill = STIMULUS_CATEGORY)) +
geom_histogram() +
facet_grid( STIMULUS_CATEGORY~ QUESTION) +
theme_minimal() +
labs(title = "Distrubtion of SEMANTIC DIFFERENTIAL SCALES by Question") +
easy_remove_legend())
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggsave(plot = g, path="figs/histograms", filename =paste0("sd_scales_by_question_category","histograms.png"), units = c("in"), width = 20, height = 20)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## STIMULUS LEVEL CONFIDENCE
df <- df_questions %>%
filter(QUESTION %in% ref_conf_questions) %>%
mutate(value= as.numeric(value))
(g <- ggplot(df, aes(x = value, fill = fct_rev(STIMULUS_CATEGORY))) +
geom_histogram() +
facet_grid(rows = vars(BLOCK),
cols = vars(fct_rev(STIMULUS_CATEGORY)), scales = "free_y", space = "free", drop = TRUE) +
theme_minimal()+
labs(title = "Distrubtion of Confidence by Stimulus"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggsave(plot = g, path="figs/histograms", filename =paste0("confidence_matrix_","histograms.png"), units = c("in"), width = 20, height = 20)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
df <- df_questions %>%
filter(QUESTION %in% c("MAKER_CONF", "AGE_CONF", "GENDER_CONF", "TOOL_CONF")) %>%
mutate(value= as.numeric(value)) %>%
group_by(PID, QUESTION) %>%
summarise(
mean_conf = mean(value),
sd_conf = sd(value),
var_conf = var(value)
)
ggplot(df, aes( x = mean_conf)) +
geom_histogram() +
facet_wrap( ~ QUESTION) +
labs(
title = "Mean Participant confidence by Question",
subtitle = "(averaging across all 5 stimuli per participant)"
) +
theme_minimal()
ggplot(df, aes( x = sd_conf)) +
geom_histogram() +
facet_wrap( ~ QUESTION) +
labs(
title = "Standard Deviation of Participant confidence by Question",
subtitle = "(averaging across all 5 stimuli per participant)"
) +
theme_minimal()
## BY PARTICIPANT
df <- df_questions %>%
filter(QUESTION %in% c("MAKER_CONF", "AGE_CONF", "GENDER_CONF", "TOOL_CONF")) %>%
mutate(value= as.numeric(value)) %>%
group_by(PID) %>%
summarise(
mean_conf = mean(value),
sd_conf = sd(value),
var_conf = var(value)
)
ggplot(df, aes( x = mean_conf)) +
geom_histogram(binwidth = 2) +
labs(
title = "Mean Participant Confidence ",
subtitle = "(averaging across 4 confidence questions and all 5 stimuli per participant)"
) +
theme_minimal()
ggplot(df, aes( x = sd_conf)) +
geom_histogram(binwidth = 2) +
xlim(0,100)+
labs(
title = "Standard Deviation of Participant Confidence ",
subtitle = "(averaging across 4 confidence questions and all 5 stimuli per participant)"
) +
theme_minimal()
df <- df_graphs
#MAKER IDENTIFICATION
ggplot( df, aes( x = fct_rev(STIMULUS), fill = MAKER_ID)) +
geom_bar(position = "fill") +
facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") +
labs( title = "MAKER ID by Stimulus (grouped by CATEGORY) ",
subtitle = "", x = "") +
scale_fill_viridis(discrete=TRUE, option="viridis") +
coord_flip()+
easy_add_legend_title("") +
theme_minimal()
#MAKER IDENTIFICATION
ggplot( df, aes( x = fct_rev(STIMULUS), fill = MAKER_ID)) +
geom_bar(position = "fill") +
facet_grid(fct_rev(STIMULUS_CATEGORY) ~ ., scales = "free", space = "free") +
labs( title = "MAKER ID by Stimulus (grouped by CATEGORY) ",
subtitle = "", x = "") +
scale_fill_viridis(discrete=TRUE, option="viridis") +
coord_flip()+
easy_add_legend_title("") +
theme_minimal()
df <- df_graphs
#MAKER IDENTIFICATION
ggplot( df, aes( x = STIMULUS_CATEGORY, fill = MAKER_ID)) +
geom_bar(position = "fill") +
facet_grid(Distribution ~ .) +
labs( title = "MAKER ID by Stimulus Category",
subtitle = "", x = "") +
scale_fill_viridis(discrete=TRUE, option="viridis") +
coord_flip()+
easy_add_legend_title("") +
theme_minimal()
#MAKER IDENTIFICATION
ggplot( df, aes( x = STIMULUS_CATEGORY, fill = MAKER_ID)) +
geom_bar(position = "fill") +
labs( title = "MAKER ID by Stimulus Category",
subtitle = "", x = "") +
scale_fill_viridis(discrete=TRUE, option="viridis") +
coord_flip()+
easy_add_legend_title("") +
theme_minimal()
#FILTER DATAFRAME
df <- df_graphs
######### MAKER ID AND CONFIDENCE ##############
# MAKER_ID by Sample and BEHAVIOR
a <- ggplot (df, aes( x = fct_rev(STIMULUS), fill = MAKER_ID)) +
# geom_bar(width = 0.5, position = position_dodge(0.5)) +
geom_bar(position = "fill", width = 0.8) +
scale_fill_viridis(discrete=TRUE, option="viridis") +
# facet_grid( df$ENCOUNTER) +
labs (x = "") +
theme_minimal()
# MAKER_CONFIDENCE by IDENTIFICATION
b <- ggplot(df, aes( x = fct_rev(STIMULUS), y = MAKER_CONF, color = MAKER_ID)) +
geom_boxplot(position=position_dodge(0.9), width = 0.5)+
geom_jitter(position=position_jitterdodge(), alpha = 0.4) +
scale_color_viridis(discrete=TRUE, option="viridis") +
labs (title = "") +
labs (x = "") +
theme_minimal()
(p <- (a / b ) + plot_annotation(
title = 'MAKER ID & CONFIDENCE by Stimulus',
subtitle = '',
caption = ''))
###################################################
rm(a,b,p)
#FILTER DATAFRAME
df <- df_graphs
######### MAKER ID AND CONFIDENCE ##############
# MAKER_ID by Sample and BEHAVIOR
a <- ggplot (df, aes( x = fct_rev(STIMULUS_CATEGORY), fill = MAKER_ID)) +
geom_bar(position = "fill") +
scale_fill_viridis(discrete=TRUE, option="viridis") +
# facet_grid( df$ENCOUNTER) +
labs (x = "") +
theme_minimal()
# MAKER_CONFIDENCE by IDENTIFICATION
b <- ggplot(df, aes( x = fct_rev(STIMULUS_CATEGORY), y = MAKER_CONF, color = MAKER_ID)) +
geom_boxplot(position=position_dodge(0.9))+
geom_jitter(position=position_jitterdodge(), alpha = 0.2) +
scale_color_viridis(discrete=TRUE, option="viridis") +
# labs (title = "MAKER-ID-CONFIDENCE") +
labs (x = "STIMULUS CATEGORY") +
theme_minimal()
(p <- (a / b ) + plot_annotation(
title = 'MAKER ID & CONFIDENCE',
subtitle = '',
caption = ''))
rm(a,b,p)
###################################################
df <- df_graphs
#MAKER IDENTIFICATION
ggplot( df, aes( x = fct_rev(STIMULUS), fill = MAKER_AGE)) +
geom_bar(position = "fill") +
#SCALES & SPACE FREE SUPPRESSES EMPTY ROWS
facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") +
labs( title = "MAKER AGE by Stimulus (grouped by CATEGORY) ",
subtitle = "", x = "") +
scale_fill_viridis(discrete=TRUE, option="viridis") +
coord_flip()+
# easy_add_legend_title("") +
theme_minimal()
df <- df_graphs
#MAKER AGE
ggplot( df, aes( x = STIMULUS_CATEGORY, fill = MAKER_AGE)) +
geom_bar(position = "fill") +
facet_grid(Distribution ~ .) +
labs( title = "MAKER AGE by Stimulus Category",
subtitle = "", x = "") +
scale_fill_viridis(discrete=TRUE, option="viridis") +
# facet_grid(rows= vars(D_age)) +
coord_flip()+
theme_minimal()
#FILTER DATAFRAME
df <- df_graphs
######### MAKER ID AND CONFIDENCE ##############
# MAKER_ID by Sample and BEHAVIOR
a <- ggplot (df, aes( x = fct_rev(STIMULUS), fill = MAKER_AGE)) +
geom_bar(position = "fill", width = 0.8) +
scale_fill_viridis(discrete=TRUE, option="viridis") +
# facet_grid( df$ENCOUNTER) +
labs (x = "") +
theme_minimal()
# MAKER_CONFIDENCE by IDENTIFICATION
b <- ggplot(df, aes( x = fct_rev(STIMULUS), y = MAKER_CONF, color = MAKER_AGE)) +
geom_boxplot(position=position_dodge(0.9), width = 0.5)+
geom_jitter(position=position_jitterdodge(), alpha = 0.2) +
scale_color_viridis(discrete=TRUE, option="viridis") +
labs (title = "") +
labs (x = "STIMULUS CATEGORY") +
theme_minimal()
(p <- (a / b ) + plot_annotation(
title = 'MAKER AGE & CONFIDENCE by Category',
subtitle = '',
caption = ''))
###################################################
rm(a,b,p)
#FILTER DATAFRAME
df <- df_graphs
######### MAKER ID AND CONFIDENCE ##############
# MAKER_AGE by Sample and BEHAVIOR
a <- ggplot (df, aes( x = fct_rev(STIMULUS_CATEGORY), fill = MAKER_AGE)) +
geom_bar(position = "fill") + #dodge
scale_fill_viridis(discrete=TRUE, option="viridis") +
# facet_grid( df$ENCOUNTER) +
labs (x = "") +
theme_minimal()
# MAKER_CONFIDENCE by IDENTIFICATION
b <- ggplot(df, aes( x = fct_rev(STIMULUS_CATEGORY), y = AGE_CONF, color = MAKER_AGE)) +
geom_boxplot(position=position_dodge(0.9))+
geom_jitter(position=position_jitterdodge(), alpha = 0.2) +
scale_color_viridis(discrete=TRUE, option="viridis") +
labs (x = "STIMULUS CATEGORY") +
theme_minimal()
(p <- (a / b ) + plot_annotation(
title = 'MAKER AGE & CONFIDENCE by Category',
subtitle = '',
caption = ''))
###################################################
rm(a,b,p)
df <- df_graphs
#MAKER IDENTIFICATION
ggplot( df, aes( x = fct_rev(STIMULUS), fill = MAKER_GENDER)) +
geom_bar(position = "fill") +
facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") +
labs( title = "MAKER GENDER by Stimulus (grouped by CATEGORY) ",
subtitle = "", x = "") +
scale_fill_viridis(discrete=TRUE, option="viridis") +
coord_flip()+
# easy_add_legend_title("") +
theme_minimal()
### Maker Gender by Category
df <- df_graphs
#MAKER IDENTIFICATION
ggplot( df, aes( x = STIMULUS_CATEGORY, fill = MAKER_GENDER)) +
geom_bar(position = "fill") +
facet_grid(Distribution ~ .)+
labs( title = "MAKER GENDER by Stimulus Category",
subtitle = "", x = "") +
scale_fill_viridis(discrete=TRUE, option="viridis") +
# facet_grid(rows= vars(D_age)) +
coord_flip()+
theme_minimal()
#FILTER DATAFRAME
df <- df_graphs
######### MAKER ID AND CONFIDENCE ##############
# MAKER_GENDER by STIMULUS
a <- ggplot (df, aes( x = fct_rev(STIMULUS), fill = MAKER_GENDER)) +
geom_bar(position = "fill", width = 0.8) +
scale_fill_viridis(discrete=TRUE, option="viridis") +
# facet_grid( df$ENCOUNTER) +
labs (x = "") +
theme_minimal()
# GENDER_CONFIDENCE by STIMULUS
b <- ggplot(df, aes( x = fct_rev(STIMULUS), y = GENDER_CONF, color = MAKER_GENDER)) +
geom_boxplot(position=position_dodge(0.9), width = 0.5)+
geom_jitter(position=position_jitterdodge(), alpha = 0.2) +
scale_color_viridis(discrete=TRUE, option="viridis") +
labs (x = "STIMULUS") +
theme_minimal()
(p <- (a / b ) + plot_annotation(
title = 'MAKER GENDER & CONFIDENCE by Stimulus',
subtitle = '',
caption = ''))
###################################################
rm(a,b,p)
#FILTER DATAFRAME
df <- df_graphs
######### MAKER GENDER AND CONFIDENCE ##############
a <- ggplot (df, aes( x = fct_rev(STIMULUS_CATEGORY), fill = MAKER_GENDER)) +
geom_bar(position = "fill") +
scale_fill_viridis(discrete=TRUE, option="viridis") +
# facet_grid( df$ENCOUNTER) +
labs (x = "") +
theme_minimal()
# GENDER_CONFIDENCE by GENDER
b <- ggplot(df, aes( x = fct_rev(STIMULUS_CATEGORY), y = GENDER_CONF, color = MAKER_GENDER)) +
geom_boxplot(position=position_dodge(0.9))+
geom_jitter(position=position_jitterdodge(), alpha = 0.2) +
scale_color_viridis(discrete=TRUE, option="viridis") +
labs (x = "STIMULUS CATEGORY") +
theme_minimal()
(p <- (a / b ) + plot_annotation(
title = 'MAKER GENDER & CONFIDENCE by Category',
subtitle = '',
caption = ''))
###################################################
rm(a,b,p)
df <- df_tools
# TOOL CHOICE BY STIMULUS
ggplot(data = df, aes( fill = fct_rev(TOOL_ID), x = fct_rev(STIMULUS) )) +
geom_bar(position = "fill") +
coord_flip() +
facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") +
scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
scale_y_continuous(labels = scales::percent) +
labs( title = "TOOL ID by Stimulus (grouped by Category)",
subtitle = "", x = "") +
easy_add_legend_title("TOOL_ID") +
theme_minimal()
df <- df_tools
# TOOL CHOICE BY STIMULUS
ggplot(data = df, aes( fill = fct_rev(TOOL_ID), x = STIMULUS_CATEGORY )) +
geom_bar(position = "fill") +
facet_grid(Distribution ~ .) +
coord_flip() +
scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
scale_y_continuous(labels = scales::percent) +
labs( title = "TOOL ID by Category",
subtitle = "", x = "") +
easy_add_legend_title("TOOL_ID") +
theme_minimal()
#FILTER DATAFRAME
df <- df_tools
######### TOOL ID AND CONFIDENCE ##############
a <- ggplot (df, aes( x = fct_rev(STIMULUS), fill = fct_rev(TOOL_ID))) +
geom_bar(position = "fill", width = 0.8) +
scale_fill_paletteer_d("awtools::a_palette", direction = 1) +
# facet_grid( df$ENCOUNTER) +
labs (x = "") +
easy_add_legend_title("TOOL ID")+
theme_minimal()
# TOOL_CONFIDENCE by STIMULUS
b <- ggplot(df, aes( x = fct_rev(STIMULUS), y = TOOL_CONF, color = fct_rev(TOOL_ID))) +
geom_boxplot(position=position_dodge(0.9), width = 0.6)+
geom_jitter(position=position_jitterdodge(), alpha = 0.2) +
scale_color_paletteer_d("awtools::a_palette", direction = 1) +
labs (x = "STIMULUS") +
easy_add_legend_title("TOOL ID")+
theme_minimal()
(p <- (a / b ) + plot_annotation(
title = 'TOOL ID & CONFIDENCE by Stimulus',
subtitle = '',
caption = ''))
###################################################
rm(a,b,p)
#FILTER DATAFRAME
df <- df_tools
######### TOOL ID AND CONFIDENCE ##############
a <- ggplot (df, aes( x = fct_rev(STIMULUS_CATEGORY), fill = fct_rev(TOOL_ID))) +
geom_bar(position = "fill") +
scale_fill_paletteer_d("awtools::a_palette", direction = 1) +
# facet_grid( df$ENCOUNTER) +
labs (x = "") +
easy_add_legend_title("TOOL ID")+
theme_minimal()
# TOOL CONF
b <- ggplot(df, aes( x = fct_rev(STIMULUS_CATEGORY), y = TOOL_CONF, color = fct_rev(TOOL_ID))) +
geom_boxplot(position=position_dodge(0.9))+
geom_jitter(position=position_jitterdodge(), alpha = 0.2) +
scale_color_paletteer_d("awtools::a_palette", direction = 1) +
labs (x = "STIMULUS CATEGORY") +
easy_add_legend_title("TOOL ID")+
theme_minimal()
(p <- (a / b ) + plot_annotation(
title = 'TOOL ID & CONFIDENCE by Category',
subtitle = '',
caption = ''))
###################################################
rm(a,b,p)
df <- df_graphs
#MAKER IDENTIFICATION
ggplot( df, aes( x = fct_rev(STIMULUS), fill = ENCOUNTER)) +
geom_bar(position = "fill") +
facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") +
labs( title = "MAKER ENCOUNTER by Stimulus (grouped by CATEGORY) ",
subtitle = "", x = "") +
scale_fill_brewer(palette = "Dark2") +
# scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
coord_flip()+
# easy_add_legend_title("") +
theme_minimal()
df <- df_graphs
#MAKER IDENTIFICATION
ggplot( df, aes( x = STIMULUS_CATEGORY, fill = ENCOUNTER)) +
geom_bar(position = "fill") +
facet_grid(Distribution ~ .) +
labs( title = "ENCOUNTER by Stimulus Category (grouped by CATEGORY)",
subtitle = "", x = "") +
scale_fill_brewer(palette = "Dark2") +
coord_flip()+
theme_minimal()
df <- df_actions
# ACTION CHOICE BY STIMULUS
ggplot(data = df, aes( fill = CHART_ACTION, x = fct_rev(STIMULUS) )) +
geom_bar(position = "fill") +
coord_flip() +
facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") +
scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
scale_y_continuous(labels = scales::percent) +
labs( title = "Chart Action by Stimulus (grouped by Category)",
subtitle = "", x = "") +
easy_add_legend_title("ACTION") +
theme_minimal()
df <- df_actions
# ACTION CHOICE BY STIMULUS
ggplot(data = df, aes( fill = CHART_ACTION, x = STIMULUS_CATEGORY )) +
geom_bar(position = "fill") +
facet_grid(Distribution ~ .) +
coord_flip() +
scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
scale_y_continuous(labels = scales::percent) +
labs( title = "CHART ACTION by Category",
subtitle = "", x = "") +
easy_add_legend_title("ACTION") +
theme_minimal()
df <- df_graphs
qacBase::qstats(df, CHART_BEAUTY, STIMULUS) %>% arrange(mean)
## STIMULUS n mean sd
## 1 B5-1 53 27.23 22.54
## 2 B3-1 52 28.56 22.96
## 3 B4-2 53 28.77 26.02
## 4 B3-3 52 31.54 27.19
## 5 B2-2 52 31.73 26.11
## 6 B1-3 55 38.13 32.33
## 7 B6-4 51 40.24 29.85
## 8 B1-4 55 45.18 28.97
## 9 B6-2 51 45.22 27.18
## 10 B1-2 55 46.09 25.28
## 11 B5-2 53 49.17 22.59
## 12 B0-0 316 49.62 27.02
## 13 B5-3 53 50.19 27.54
## 14 B4-1 53 53.75 27.75
## 15 B2-1 52 55.13 25.80
## 16 B1-1 55 55.75 29.37
## 17 B6-1 51 56.02 24.26
## 18 B3-2 52 57.25 25.32
## 19 B5-4 53 57.81 26.61
## 20 B2-4 52 61.62 26.60
## 21 B4-4 53 61.79 28.38
## 22 B6-3 51 62.39 27.72
## 23 B3-4 52 63.60 27.31
## 24 B4-3 53 66.60 21.00
## 25 B2-3 52 74.23 24.85
qacBase::qstats(df, CHART_BEAUTY, STIMULUS_CATEGORY) %>% arrange(mean)
## STIMULUS_CATEGORY n mean sd
## 1 B 316 43.03 27.10
## 2 A 316 46.10 28.49
## 3 F 316 49.62 27.02
## 4 C 316 53.70 30.89
## 5 D 316 55.02 29.17
#
#
# ggplot(df, aes(x = CHART_BEAUTY)) +
# geom_histogram(bins = 20) +
# facet_grid(BLOCK ~ STIMULUS_CATEGORY, scales = "free", space = "free", drop=TRUE) +
# theme_minimal()
#
#
# ggplot(df, aes(x = CHART_BEAUTY)) +
# geom_histogram(bins = 20) +
# facet_grid( ~ fct_rev(STIMULUS_CATEGORY), scales = "free", space = "free", drop=TRUE) +
# theme_minimal()
#
##ggstatsplot BY CATEGORY
grouped_gghistostats(
data = df_graphs %>% filter(STIMULUS != "B0-0"),
x = CHART_BEAUTY, ## same outcome variable
grouping.var = STIMULUS, ## grouping variable males = 1, females = 2
type = "robust", ## robust test: one-sample percentile bootstrap
test.value = 50, ## test value against which sample mean is to be compared
centrality.line.args = list(color = "#D55E00", linetype = "dashed"),
# ggtheme = ggthemes::theme_stata(), ## changing default theme
## turn off ggstatsplot theme layer
## arguments relevant for combine_plots
annotation.args = list(
title = "DISTRIBUTION of Chart Beauty by Cateogry",
caption = ""
),
plotgrid.args = list(nrow = 6)
)
#B0-0
gghistostats(
data = df_graphs %>% filter(STIMULUS == "B0-0"), ## data from which variable is to be taken
x = CHART_BEAUTY, ## numeric variable
xlab = "CHART BEAUTY", ## x-axis label
title = "B0-0 MILLENIAL PINK PLANTS", ## title for the plot
test.value = 50, ## test value
caption = ""
)
df <- df_graphs
qacBase::qstats(df, CHART_TRUST, STIMULUS) %>% arrange(mean)
## STIMULUS n mean sd
## 1 B1-3 55 30.98 27.33
## 2 B3-3 52 39.92 27.16
## 3 B1-4 55 45.45 23.37
## 4 B2-4 52 47.94 26.74
## 5 B5-3 53 49.15 24.83
## 6 B4-2 53 50.62 21.28
## 7 B0-0 316 50.80 20.10
## 8 B6-4 51 52.14 22.33
## 9 B5-4 53 53.68 21.04
## 10 B6-3 51 54.24 21.75
## 11 B2-2 52 54.52 22.15
## 12 B4-4 53 55.94 24.65
## 13 B6-1 51 56.78 19.09
## 14 B1-2 55 56.98 22.19
## 15 B6-2 51 57.39 22.10
## 16 B1-1 55 58.04 22.54
## 17 B3-2 52 58.52 18.56
## 18 B3-4 52 59.00 20.68
## 19 B5-1 53 61.77 23.68
## 20 B5-2 53 62.36 20.57
## 21 B2-1 52 62.44 20.45
## 22 B3-1 52 63.31 16.86
## 23 B2-3 52 66.00 22.10
## 24 B4-1 53 66.28 23.03
## 25 B4-3 53 72.49 20.13
qacBase::qstats(df, CHART_TRUST, STIMULUS_CATEGORY) %>% arrange(mean)
## STIMULUS_CATEGORY n mean sd
## 1 F 316 50.80 20.10
## 2 C 316 51.98 27.87
## 3 D 316 52.31 23.51
## 4 B 316 56.73 21.32
## 5 A 316 61.44 21.19
##ggstatsplot BY CATEGORY
grouped_gghistostats(
data = df_graphs %>% filter(STIMULUS != "B0-0"),
x = CHART_TRUST, ## same outcome variable
grouping.var = STIMULUS, ## grouping variable males = 1, females = 2
type = "robust", ## robust test: one-sample percentile bootstrap
test.value = 50, ## test value against which sample mean is to be compared
centrality.line.args = list(color = "#D55E00", linetype = "dashed"),
# ggtheme = ggthemes::theme_stata(), ## changing default theme
## turn off ggstatsplot theme layer
## arguments relevant for combine_plots
annotation.args = list(
title = "DISTRIBUTION of Chart TRUST by Cateogry",
caption = ""
),
plotgrid.args = list(nrow = 6)
)
#B0-0
gghistostats(
data = df_graphs %>% filter(STIMULUS == "B0-0"), ## data from which variable is to be taken
x = CHART_TRUST, ## numeric variable
xlab = "CHART TRUST", ## x-axis label
title = "B0-0 MILLENIAL PINK PLANTS", ## title for the plot
test.value = 50, ## test value
caption = ""
)
# #FILTER DATAFRAME
# df <- df_graphs_full %>%
# filter(STIMULUS== "B0-0") %>%
# select(
# Distribution, D_politicalParty,
# # D_gender, D_race, D_education, D_income, D_age,
# # ENCOUNTER,
# MAKER_ID,
# D_politicsSocial, D_politicsFiscal,
# MAKER_DESIGN, MAKER_DATA, MAKER_CONF, MAKER_ALIGN, MAKER_TRUST, CHART_TRUST
# # :MAKER_TRUST,
# # TOOL_CONF,CHART_LIKE:CHART_TRUST
# )
#
# # g <- ggpairs(df, color=MAKER_ID)
# # ggsave(plot = g, path="figs/pairplots", filename =paste0("B0-0","_pairplot.svg"), units = c("in"), width = 20, height = 20 )
# # ggplotly(g)
#
#
#
#
# (boo_pair <- ggpairs(df, aes(color= MAKER_ID)) +
# scale_color_viridis(discrete=TRUE, option="viridis")+
# scale_fill_viridis(discrete=TRUE, option="viridis") +
# theme_minimal()
# )
#
#
# (boo_duo <- ggduo(df, aes(color= Distribution)) +
# scale_color_viridis(discrete=TRUE, option="viridis")+
# scale_fill_viridis(discrete=TRUE, option="viridis") +
# theme_minimal()
# )
#
#ggplotly(boo_duo) #surprisingly works! but not helpful
# df <- df_graphs %>% filter(STIMULUS == "B0-0") %>%
# mutate(
# flipped_data = abs(MAKER_DATA - 100),
# flipped_design = abs(MAKER_DESIGN -100),
# flipped_intent = abs(CHART_INTENT -100),
# flipped_self = abs(MAKER_SELF -100)
# ) %>% select(
# flipped_data:flipped_self, MAKER_ARGUE, MAKER_ALIGN, CHART_LIKE, CHART_BEAUTY, CHART_TRUST, MAKER_TRUST
# )
#
#
# m.1 <- lm(CHART_TRUST ~ MAKER_TRUST, data = df )
# summ(m.1)
#
# m.2 <- lm(CHART_TRUST ~ MAKER_TRUST + MAKER_ALIGN , data = df )
# summ(m.2)
#
# m.3 <- lm(CHART_TRUST ~ MAKER_TRUST + MAKER_ALIGN + CHART_BEAUTY , data = df )
# summ(m.3)
#
# m.4 <- lm(CHART_TRUST ~ MAKER_TRUST + MAKER_ALIGN + CHART_BEAUTY + flipped_intent, data = df )
# summ(m.4)
#
# m.5 <- lm(CHART_TRUST ~ MAKER_TRUST + MAKER_ALIGN + CHART_BEAUTY + flipped_intent + flipped_data, data = df )
# summ(m.5)
#
# m <- lm(CHART_TRUST ~ MAKER_ALIGN, data = df)
# summ(m)
#
#
# compare_performance(m.1, m.2, m.3, m.4, m.5, m, rank = TRUE)
#
# ## WHAT PREDICTS MAKER_TRUST?
#
#
# ## PLOT THE DATA
# ggplot(df, aes(x = MAKER_ALIGN, y = MAKER_TRUST)) +
# geom_point() +
# stat_smooth(method = "lm",
# formula = y ~ x,
# geom = "smooth") +
# labs(title = "MAKER ALIGNMENT PREDICTS TRUST?")+
# theme_minimal()
#
#
# ## PLOT THE DATA
# ggplot(df, aes(x = flipped_data, y = MAKER_TRUST)) +
# geom_point() +
# stat_smooth(method = "lm",
# formula = y ~ x,
# geom = "smooth") +
# labs(title = "MAKER DATA PREDICTS TRUST?")+
# theme_minimal()
#
#
# ## BUILD MODEL
# m.1 <- lm( MAKER_TRUST ~ MAKER_ALIGN, data = df)
# # summary(m.1)
# jtools::summ(m.1, confint = TRUE)
# # check_model(m.1)
# # report(m.1)
#
# ## BUILD MODEL
# m.2 <- lm( MAKER_TRUST ~ MAKER_ALIGN + flipped_data, data = df)
# # summary(m.2)
# jtools::summ(m.2, confint = TRUE)
# # check_model(m.2)
# # report(m.2)
#
# ## BUILD MODEL
# m.3 <- lm( MAKER_TRUST ~ MAKER_ALIGN + flipped_data + flipped_design , data = df)
# # summary(m.3)
# jtools::summ(m.3, confint = TRUE)
# # check_model(m.3)
# # report(m.3)
#
#
# ## BUILD MODEL
# m.4 <- lm( MAKER_TRUST ~ MAKER_ALIGN + flipped_data + flipped_design + MAKER_POLITIC, data = df)
# # summary(m.3)
# jtools::summ(m.4, confint = TRUE)
# # check_model(m.3)
# # report(m.3)
#
#
#
#
# compare_performance(m.1, m.2, m.3, m.4, m.B, rank = TRUE)
#
#
# # effect_plot(m.2, pred=MAKER_DATA, rug = TRUE, plot.points = TRUE) +
# # xlim(0,100) +
# # ylim(0,100)
#
#
#
#
# df <- df_graphs %>% filter(STIMULUS != "B0-0") %>%
# select(PID, STIMULUS,
# MAKER_ID:MAKER_TRUST, CHART_LIKE:CHART_TRUST,
# D_politicsFiscal, D_politicsSocial)
#
# m.pid <- lmer( MAKER_TRUST ~ (1 | PID) , data = df, REML = FALSE)
# m.stim <- lmer( MAKER_TRUST ~ (1|STIMULUS) , data = df, REML = FALSE)
# m.r <- lmer( MAKER_TRUST ~ (1 | PID) + (1|STIMULUS), data = df, REML = FALSE)
#
# compare_performance (m.pid, m.stim, m.r, rank = TRUE)
#
#
#
# m.1 <- update( m.r, .~. + CHART_BEAUTY)
# summ(m.1)
#
# m.2 = update(m.1, . ~ . + CHART_LIKE)
# summ(m.2)
#
#
# m.3 = update(m.2, . ~ . + CHART_TRUST)
# summ(m.3)
#
#
# compare_performance(m.r, m.1, m.2, m.3, rank = TRUE)
df <- df_graphs_full %>%
filter(STIMULUS== "B0-0")
p <- ggplot(df, aes(x = MAKER_POLITIC, y = MAKER_CONF,
color = MAKER_ID ,
text = paste0("MAKER-DETAIL: ",MAKER_DETAIL, "<br>","ID: ", PID, "MAKER_EXPLAIN", MAKER_EXPLAIN)))+
scale_color_viridis(discrete=TRUE, option="viridis") +
geom_point(size=0.5) +
xlim(0,100)+
ylim(0,100)+
facet_grid(D_politicalParty ~ MAKER_ID) +
theme_minimal() +
labs(
title = "B0-0 | MAKER Identification, Politics and Confidence",
subtitle = "\n by Participant Confidence and Political Party",
x = "MAKER POLITICS",
y = "MAKER ID CONFIDENCE"
)
ggplotly(p)
# df <- df_confidence_long %>% filter(QUESTION %nin% c("CONF", "SDIFF")) ##AVERAGE CONFIDENCE
df <- df_graphs_full %>% filter(STIMULUS== "B0-0")
ggplot(df, aes(x = MAKER_CONF, y = MAKER_TRUST, color = MAKER_ID)) +
geom_point() +
scale_color_viridis(discrete=TRUE, option="viridis") +
facet_grid(BLOCK ~ STIMULUS_CATEGORY) +
theme_minimal() +
easy_remove_legend()
ggplot(df, aes(x = MAKER_POLITIC, y = MAKER_TRUST, color = MAKER_ID)) +
geom_point() +
scale_color_viridis(discrete=TRUE, option="viridis") +
facet_grid(BLOCK ~ STIMULUS_CATEGORY) +
theme_minimal() +
easy_remove_legend()
df <- df_graphs %>% select(
STIMULUS_CATEGORY, MAKER_ID, MAKER_DESIGN, MAKER_DATA, MAKER_POLITIC, MAKER_ARGUE, MAKER_ALIGN, MAKER_TRUST, CHART_TRUST, CHART_BEAUTY
)
#maker pair plot
x <- ggduo(df, aes(color = STIMULUS_CATEGORY))
ggsave(plot = x, path="figs/pairplots", filename =paste0("maker_category","_duoplot.png"), units = c("in"), width = 20, height = 20 )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#maker pair plot
x <- ggpairs(df, aes(color = STIMULUS_CATEGORY))
ggsave(plot = x, path="figs/pairplots", filename =paste0("maker_category","_pairplot.png"), units = c("in"), width = 20, height = 20 )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
df <- df_graphs %>% select(
STIMULUS_CATEGORY, MAKER_ID, CHART_LIKE, CHART_BEAUTY, CHART_INTENT, CHART_TRUST, MAKER_TRUST
)
#maker pair plot
x <- ggduo(df, aes(color = STIMULUS_CATEGORY))
ggsave(plot = x, path="figs/pairplots", filename =paste0("chart_category","_duoplot.png"), units = c("in"), width = 20, height = 20 )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#maker pair plot
x <- ggpairs(df, aes(color = STIMULUS_CATEGORY))
ggsave(plot = x, path="figs/pairplots", filename =paste0("chart_category","_pairplot.png"), units = c("in"), width = 20, height = 20 )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
df <- df_graphs %>% select(STIMULUS_CATEGORY, where(is.numeric)) %>%
select(-duration.min)
#maker pair plot
x <- ggscatmat(df, color = "STIMULUS_CATEGORY")
## Warning in ggscatmat(df, color = "STIMULUS_CATEGORY"): Factor variables are
## omitted in plot
ggsave(plot = x, path="figs/pairplots", filename =paste0("category","_scaplot.png"), units = c("in"), width = 20, height = 20 )
#SETUP LISTS
stim <- ref_stimuli
plots_maker_politics <- htmltools::tagList()
### MAKE PLOTS
# LOOP THROUGH STIMULI
i = 0
for (s in stim){
i = i+1 #iterator hack
d <- df_graphs_full %>% filter(STIMULUS == s) %>% select(PID, STIMULUS,
MAKER_ID, MAKER_DETAIL, MAKER_CONF, MAKER_POLITIC,
D_politicalParty, D_politicsSocial, D_politicsFiscal)
p <- ggplot(d, aes(x = MAKER_POLITIC, y = MAKER_CONF,
color = MAKER_ID ,
text = paste0("MAKER-DETAIL: ",MAKER_DETAIL, "<br>","ID: ", PID)))+
geom_point() +
xlim(0,100)+
ylim(0,100)+
facet_grid(MAKER_ID ~ D_politicalParty) +
theme_minimal() +
labs(
title = paste(s," | ", "MAKER Identification, Politics and Confidence, by Participant Confidence and Political Affiliation"),
x = "MAKER POLITICS",
y = "MAKER CONFIDENCE"
)
plots_maker_politics[[i]] <- ggplotly(p)
}
names(plots_maker_politics) <- ref_stimuli
#works in console but not render
### PRINT PLOTS
# for (s in stim){
# print(plots_maker_politics[[s]])
# }
# ggplotly(p) #works for single
plots_maker_politics
# df <- df_graphs_coded %>% select( ID.Prolific, STIMULUS, BLOCK, STIMULUS_CATEGORY, MAKER_ID, MAKER_DETAIL, CODE_M_ID_SPECIFIC) %>%
# separate_longer_delim(
# cols = CODE_M_ID_SPECIFIC,
# delim = ","
# ) %>% filter(
# CODE_M_ID_SPECIFIC %nin% c("x", "X","example")
# ) %>% mutate(
# CODED_MAKER = str_trim(CODE_M_ID_SPECIFIC, side="left"),
# CODED_MAKER = str_to_upper(CODED_MAKER),
# CODED_MAKER = factor(CODED_MAKER, levels = c( "NYT",
# "WASHINGTON POST" ,
# "USA TODAY" ,
# "THE ECONOMIST" ,
# "WSJ",
# "POPULAR SCIENCE" ,
# "TIME",
# "NPR",
# "PBS",
# "ASSOCIATED PRESS" ,
# "BBC",
# "CBS NEWS" ,
# "ABC NEWS" ,
# "NBC",
# "CNN",
# "FOX NEWS" ,
# "BUZZFEED",
# "VOX",
# "VICE",
# "HUFFINGTON POST" ,
# "US DOD" ,
# "US BUREAU OF ECONOMIC ANALYSIS",
# "US DOE" ,
# "US CDC" ,
# "MINISTRY OF HEALTH" ,
# "EPA",
# "NOAA",
# "NATIONAL WEATHER SERVICE" ,
# "WHO",
# "UN",
# "THE NATURE CONSERVANCY" ,
# "GREENPEACE",
# "LEAF",
# "HARVARD",
# "COLUMBIA UNIVERSITY" ,
# "IAA",
# "IBM",
# "MICROSOFT",
# "GOLDMAN SACHS" ,
# "GMC",
# "TESLA",
# "GREEN PARTY OF AMERICA" ,
# "THE DEMOCRATIC PARTY" ,
# "THE REPUBLICAN PARTY")),
# STIM = factor(STIMULUS, levels = c(
# "B1-1", "B2-1", "B3-1", "B4-1", "B5-1", "B6-1",
# "B1-2", "B2-2", "B3-2", "B4-2", "B5-2", "B6-2",
# "B1-3", "B2-3", "B3-3", "B4-3", "B5-3", "B6-3",
# "B1-4","B2-4" ,"B3-4" ,"B4-4" ,"B5-4" ,"B6-4", "B0-0"
# ))
# )
#
#
#
# ggplot(df, aes(x = MAKER_ID, fill = CODED_MAKER )) +
# geom_bar(position = "stack") +
# facet_grid( rows = vars(BLOCK), cols = vars(fct_rev(STIMULUS_CATEGORY))) +
# theme_minimal() +
# labs(title = "Specific makers identified by stimulus")
#
#
# ggplot(df, aes(x = STIMULUS, fill = CODED_MAKER )) +
# geom_bar(position = "stack") +
# theme_minimal()
#
#
# ggplot(df, aes(x = STIMULUS, fill = CODED_MAKER )) +
# geom_bar(position = "stack") +
# facet_wrap(~CODED_MAKER) +
# theme_minimal() +
# easy_remove_x_axis() +
# easy_remove_legend()
#
#
# ggplot(df, aes(x = CODE_M_ID_SPECIFIC, fill = STIM, )) +
# geom_bar(position = "stack") +
# coord_flip() +
# theme_minimal()
#
##STATSPLOT
# grouped_ggwithinstats(
# data = df_graphs,
# x = STIMULUS,
# y = CHART_TRUST,
# grouping.var = ID.Study
# )
# grouped_ggscatterstats(
# data = df_graphs, ## data frame from which variables are taken
# x = CHART_BEAUTY, ## predictor/independent variable
# y = CHART_TRUST, ## dependent variable
# grouping.var = ID.Study,
# xlab = "CHART BEAUTY", ## label for the x-axis
# ylab = "CHART TRUST", ## label for the y-axis
# # label.expression = rating < 5 & budget > 100, ## expression for deciding which points to label
# point.label.args = list(alpha = 0.7, size = 4, color = "grey50"),
# xfill = "#CC79A7", ## fill for marginals on the x-axis
# yfill = "#009E73" ## fill for marginals on the y-axis
# # title = "CHART TRUST (VS) MAKER TRUST",
# # caption = ""
# )
#
# gf_point( data = df_graphs, CHART_TRUST~MAKER_TRUST, color = ~STIMULUS) %>% gf_facet_wrap(~STIMULUS)
#
#
# grouped_ggscatterstats(
# data = df_graphs, ## data frame from which variables are taken
# x = MAKER_TRUST, ## predictor/independent variable
# y = CHART_TRUST, ## dependent variable
# grouping.var = ID.Study,
# xlab = "MAKER TRUST", ## label for the x-axis
# ylab = "CHART TRUST", ## label for the y-axis
# # label.expression = rating < 5 & budget > 100, ## expression for deciding which points to label
# point.label.args = list(alpha = 0.7, size = 4, color = "grey50"),
# xfill = "#CC79A7", ## fill for marginals on the x-axis
# yfill = "#009E73" ## fill for marginals on the y-axis
# # title = "CHART TRUST (VS) MAKER TRUST",
# # caption = ""
# )
#
# gf_point( data = df_graphs, MAKER_TRUST~CHART_TRUST, color = ~STIMULUS) %>% gf_facet_wrap(~STIMULUS)
#
#
#
# grouped_ggscatterstats(
# data = df_graphs, ## data frame from which variables are taken
# x = CHART_BEAUTY, ## predictor/independent variable
# y = MAKER_ALIGN, ## dependent variable
# grouping.var = ID.Study,
# xlab = "CHART BEAUTY", ## label for the x-axis
# ylab = "MAKER ALIGN", ## label for the y-axis
# # label.expression = rating < 5 & budget > 100, ## expression for deciding which points to label
# point.label.args = list(alpha = 0.7, size = 4, color = "grey50"),
# xfill = "#CC79A7", ## fill for marginals on the x-axis
# yfill = "#009E73" ## fill for marginals on the y-axis
# # title = "CHART TRUST (VS) MAKER TRUST",
# # caption = ""
# )
#
#
# gf_point( data = df_graphs, MAKER_ALIGN~CHART_BEAUTY, color = ~STIMULUS) %>% gf_facet_wrap(~STIMULUS)
#
#
#
#
#
#
# ggscatterstats(
# data = df_graphs %>% filter(STIMULUS=="B0-0"), ## data frame from which variables are taken
# x = CHART_BEAUTY, ## predictor/independent variable
# y = MAKER_TRUST, ## dependent variable
# xlab = "CHART BEAUTY", ## label for the x-axis
# ylab = "MAKER TRUST", ## label for the y-axis
# # label.expression = rating < 5 & budget > 100, ## expression for deciding which points to label
# point.label.args = list(alpha = 0.7, size = 4, color = "grey50"),
# xfill = "#CC79A7", ## fill for marginals on the x-axis
# yfill = "#009E73" ## fill for marginals on the y-axis
# # title = "CHART TRUST (VS) MAKER TRUST",
# # caption = ""
# )
#
# gf_point( data = df_graphs %>% filter(STIMULUS=="B0-0"), CHART_TRUST~CHART_BEAUTY, color = ~STIMULUS) %>% gf_facet_wrap(~STIMULUS)
#
# gf_point( data = df_graphs %>% filter(STIMULUS=="B0-0"), MAKER_TRUST~CHART_BEAUTY, color = ~STIMULUS) %>% gf_facet_wrap(~STIMULUS)
#
#
# #TODO CENTERING AND NORMALIZING
#
# ### little model
# df <- df_graphs %>% filter(STIMULUS == "B0-0")
# m <- lm( CHART_TRUST ~ CHART_BEAUTY,data = df)
# summary(m)
# ggnostic(m) #GGALLY MODEL CHECKS
# check_model(m) #EASY STATS MODEL CHECKS
#
# report(m)
### MORE COMPLEX MODEL
# df <- df_graphs %>% filter(STIMULUS != "B0-0")
# m1 <- lmer(CHART_TRUST ~ CHART_BEAUTY + (1|ID.Qualtrics), data = df)
# m2 <- lmer(CHART_TRUST ~ CHART_BEAUTY + (1|STIMULUS), data = df)
# m3 <- lmer(CHART_TRUST ~ CHART_BEAUTY + (1 | STIMULUS) + (1 | ID.Qualtrics), data = df)
# summary(m1)
# summary(m2)
# summary(m3)
# compare_parameters(m1,m2,m3)
# compare_performance(m1,m2,m3, rank = TRUE)
#
#
# m <- lmer(CHART_TRUST ~ CHART_BEAUTY + MAKER_TRUST + (1 | STIMULUS) , data = df)
# mi <- lmer(CHART_TRUST ~ CHART_BEAUTY * MAKER_TRUST + (1 | STIMULUS) , data = df)
# summary(m)
# summary(mi)
#
# compare_performance(m3,m,mi, rank = TRUE)
#
# report(m)
#todo see https://yury-zablotski.netlify.app/post/mixed-effects-models-2/ about fitting with ML vs REML for model comparison
# #TODO WALK THROUGH THIS
# #https://yury-zablotski.netlify.app/post/mixed-models/#multiple-random-slope-model
#
# #load example data
# data("sleepstudy")
#
# #fit the model
# m_slp <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
#
# #the next line put all the estimated intercept and slope per subject into a dataframe
# reaction_slp <- as.data.frame(t(apply(ranef(m_slp)$Subject, 1,function(x) fixef(m_slp) + x)))
#
# #to get the predicted regression lines we need one further step, writing the linear equation: Intercept + Slope*Days with different coefficient for each subject
# pred_slp <- melt(apply(reaction_slp,1,function(x) x[1] + x[2]*0:9), value.name = "Reaction")
#
# #some re-formatting for the plot
# names(pred_slp)[1:2] <- c("Days","Subject")
# pred_slp$Days <- pred_slp$Days - 1
# pred_slp$Subject <- as.factor(pred_slp$Subject)
#
# #plot with actual data
# ggplot(pred_slp,aes(x=Days,y=Reaction,color=Subject))+
# geom_line()+
# geom_point(data=sleepstudy,aes(x=Days,y=Reaction))+
# facet_wrap(~Subject,nrow=3)
NOTE: the following blocks run if the graph_render var
is set to TRUE in the header block. Graphs are generated
(takes a long time) and written to the figs folder. Only do this data
are are added or excluded
##ONLY RENDER GRAPHS IF SET TO TRUE
if(graph_render == TRUE){
#################### ALL QUESTIONS AT STIMULUS #####################
# ONE PLOT FOR EACH STIMULUS WITH ALL QUESTIONS
#set stimuli to be graphed
stimuli <- ref_stimuli #created in wrangling block
# stimili <- c("B0-0","B2-1" ,"B2-2", "B2-3", "B2-4")
box_stimuli <- list()
rain_stimuli <- list()
i = 0
for (s in stimuli){
i = i+1
# setup dataframe
title <- df_stimuli %>% filter(ID == s) %>% select(NAME)
title <- paste(s,"|",title)
df <- df_graphs %>% filter(STIMULUS== s)
#subset data cols
cols <- df %>% select( all_of(ref_sd_questions))
plots <- as.list(lapply(colnames(cols), plot_sd, data = df, type ="S", split = FALSE, boxplot=TRUE))
#aggregate q plots into one for stimulus
x <- plots[[1]] / plots[[2]] / plots[[3]] / plots[[4]] / plots[[5]] / plots[[6]] / plots[[7]] /
plots[[8]] /plots[[9]] /plots[[10]] /plots[[11]] +
plot_annotation(
title = title,
subtitle =""
)
box_stimuli[[i]] <- x
ggsave(plot = x, path="figs/all_q_by_stimulus", filename =paste0(s,"_box.png"), units = c("in"), width = 10, height = 14 )
#### RAINCLOUD PLOT
# setup dataframe
title <- df_stimuli %>% filter(ID == s) %>% select(NAME)
title <- paste(s,"|",title)
df <- df_sd_questions_wide %>% select(1:6, QUESTION, all_of(s)) %>% filter(!is.na(s)) %>% mutate(value = get(s))
#RAINCLOUD PLOT
x <- ggplot(df, aes(y = fct_rev(QUESTION), x = value, fill = fct_rev(QUESTION))) +
stat_slab(aes(thickness = after_stat(pdf*n)), scale = 0.7) +
stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA) +
guides(
# y = guide_axis_manual(labels = paste(levels(df$QUESTION),ref_labels$left)),
y = guide_axis_manual(labels = rev(ref_labels$left)),
y.sec = guide_axis_manual(labels = rev(ref_labels$right))
) +
labs (title = title) +
theme_minimal()
rain_stimuli[[i]] <- x
ggsave(plot = x, path="figs/all_q_by_stimulus", filename =paste0(s,"_rain.png"), units = c("in"), width = 10, height = 14 )
}
#GIVE NAMES TO LIST
#NOW CAN ACCESS plots by plots_stimulus$`B1-1`
#ALSO plots_stimulus$`B1-1`[[1]]
names(box_stimuli) <- stimuli
names(rain_stimuli) <- stimuli
rm(x, i, plots)
#############################################################################
}
##ONLY RENDER GRAPHS IF SET TO TRUE
if(graph_render == TRUE){
#################### ALL QUESTIONS AT CATEGORY #####################
# ONE PLOT FOR EACH CATEGORY WITH ALL QUESTIONS
#set questions to be graphed
categories <- unique(df_graphs$STIMULUS_CATEGORY)#created in wrangling block
box_category <- list()
rain_category <- list()
i = 0
for (c in categories){
i = i+1
# setup dataframe
title <- paste("STIMULUS CATEGORY ", c)
df <- df_graphs %>% filter(STIMULUS_CATEGORY == c)
#subset data cols
cols <- df %>% select( all_of(ref_sd_questions))
plots <- as.list(lapply(colnames(cols), plot_sd, data = df, type = "S", split = FALSE, boxplot=TRUE))
#aggregate stimulus plots into block for question
x <- plots[[1]] / plots[[2]] / plots[[3]] / plots[[4]] / plots[[5]] / plots[[6]] / plots[[7]] /
plots[[8]] /plots[[9]] /plots[[10]] /plots[[11]] +
plot_annotation(
title = title,
subtitle =""
)
box_category[[i]] <- x
ggsave(plot = x, path="figs/all_q_by_category", filename =paste0(c,"_box.png"), units = c("in"), width = 10, height = 26)
#### RAINCLOUD PLOT
# setup dataframe
title <- paste("STIMULUS CATEGORY ", c)
df <- df_sd_questions_long %>% filter(STIMULUS_CATEGORY == c) %>% select(STIMULUS_CATEGORY, QUESTION, value, PID, Assigned.Block)
#RAINCLOUD PLOT
x <- ggplot(df, aes(y = fct_rev(QUESTION), x = value, fill = fct_rev(QUESTION))) +
stat_slab(normalize="groups", trim = TRUE, scale = 0.7) +
stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA) +
guides(
y = guide_axis_manual(labels = paste(rev(levels(df$QUESTION)),rev(ref_labels$left))),
y = guide_axis_manual(labels = rev(ref_labels$left)),
y.sec = guide_axis_manual(labels = rev(ref_labels$right))
) +
labs (title = title) +
theme_minimal()
rain_category[[i]] <- x
ggsave(plot = x, path="figs/all_q_by_category", filename =paste0(c,"_rain.png"), units = c("in"), width = 10, height = 26 )
}
#GIVE NAMES TO LIST
#NOW CAN ACCESS plots by plots_category$A
#ALSO plots_category$A[[1]]
names(box_category) <- categories
names(rain_category) <- categories
rm(x, i, plots)
}
##ONLY RENDER GRAPHS IF SET TO TRUE
if(graph_render == TRUE){
####################CREATE SD PLOTS FOR EACH QUESTION#####################
#set questions to be graphed
questions <- ref_sd_questions #created in wrangling block
box_s_question <- list()
rain_s_question <- list()
i = 0
for (q in questions){
i = i+1
# setup dataframe
title <- paste(q)
df <- df_sd_questions_wide %>% filter(QUESTION == q)
#subset data cols
c <- df %>% select( all_of(ref_stimuli))
plots <- as.list(lapply(colnames(c), plot_sd, data = df, type = "Q", split = FALSE, boxplot=TRUE))
#aggregate stimulus plots into block for question
x <- plots[[1]] / plots[[2]] / plots[[3]] / plots[[4]] / plots[[5]] / plots[[6]] / plots[[7]] /
plots[[8]] /plots[[9]] /plots[[10]] /plots[[11]] / plots[[12]] / plots[[13]] / plots[[14]] /
plots[[15]] /plots[[16]] /plots[[17]] /plots[[18]] / plots[[19]] / plots[[20]] / plots[[21]] /
plots[[22]] /plots[[23]] /plots[[24]] /plots[[25]] +
plot_annotation(
title = title,
subtitle =""
)
box_s_question[[i]] <- x
ggsave(plot = x, path="figs/by_q_for_all_stimuli", filename =paste0(q,"_box.png"), units = c("in"), width = 10, height = 26)
############# RAINCLOUD
# setup dataframe
title <- paste(q)
df <- df_sd_questions_long %>% filter(QUESTION ==q)
#select(1:6, QUESTION, all_of(s)) %>% filter(!is.na(s)) %>% mutate(value = get(s))
left <- ref_labels[q,]$left
right <- ref_labels[q,]$right
#RAINCLOUD PLOT
x <- ggplot(df, aes(y = fct_rev(STIMULUS) , x = value, fill = fct_rev(STIMULUS))) +
stat_slab(normalize="groups", scale = 0.7) +
stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA) +
guides(
y = guide_axis_manual(labels = left),
y.sec = guide_axis_manual(labels = right)
) +
labs (title = title) +
theme_minimal()
rain_s_question[[i]] <- x
ggsave(plot = x, path="figs/by_q_for_all_stimuli", filename =paste0(q,"_rain.png"), units = c("in"), width = 10, height = 14 )
}
#GIVE NAMES TO LIST
#NOW CAN ACCESS plots by plots_s_question$MAKER_DESIGN
#ALSO plots_s_question$MAKER_DESIGN[[1]]
names(box_s_question) <- questions
names(rain_s_question) <- questions
rm(x, i, plots, left, right)
#############################################################################
}
##ONLY RENDER GRAPHS IF SET TO TRUE
if(graph_render == TRUE){
####################SD PLOTS QUESTION BY CATEGORY #####################
#set grouping column
g = "STIMULUS_CATEGORY" #string name of column to group by
#set questions to be graphed
questions <- ref_sd_questions#created in wrangling block
box_c_question <- list()
rain_c_question <- list()
i = 0
for (q in questions){
i = i+1
left <- ref_labels[q,]$left
right <- ref_labels[q,]$right
# setup dataframe
title <- paste(q," BY CATEGORY")
df <- df_graphs
x <- multi_sd(df, left, right, x = q, y = g, color = g) +
labs(title = q)
box_c_question[[i]] <- x
ggsave(plot = x, path="figs/by_q_for_all_categories", filename =paste0(q,"_box.png"))
############RAINCLOUDS
# setup dataframe
title <- paste(q," BY CATEGORY")
df <- df_sd_questions_long %>% filter(QUESTION ==q)
left <- ref_labels[q,]$left
right <- ref_labels[q,]$right
#RAINCLOUD PLOT
x <- ggplot(df, aes(y = STIMULUS_CATEGORY , x = value, fill = STIMULUS_CATEGORY)) +
stat_slab(normalize="groups", scale = 0.7) +
stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA) +
guides(
y = guide_axis_manual(labels = left),
y.sec = guide_axis_manual(labels = right)
) +
labs (title = title) +
theme_minimal()
rain_c_question[[i]] <- x
ggsave(plot = x, path="figs/by_q_for_all_categories", filename =paste0(q,"_rain.png"), units = c("in"), width = 10, height = 14 )
}
#GIVE NAMES TO LIST
#NOW CAN ACCESS plots by plots_c_question$MAKER_DESIGN
#ALSO plots_c_question$MAKER_DESIGN[[1]]
names(box_c_question) <- questions
names(rain_c_question) <- questions
rm(x, i, right, left, df)
#############################################################################
}
wip code stash
#DISPLAY COLOR PALETTE
#display.brewer.pal(n = 8, name = 'Dark2')
#DISPLAY COLOR PALETTE
# paletteer_d("Redmonder::dPBIPuGn")
# #SET UP DATAFRAME
# df <- df_data
#
# ## DENSITY HISTOGRAPH
# ggplot(data = df_data, aes( x = duration.min, fill = Assigned.Block)) +
# geom_density(alpha = 0.5) +
# facet_grid(rows = vars(Assigned.Block)) +
# labs( x = "Survey Response Time (mins)",
# title = "TOTAL Response Time by Sample",
# subtitle = "(expect similiar across samples)") +
# easy_add_legend_title("Sample") +
# theme_minimal()
#
#
# ## RAINCLOUD
# ggplot(df, aes(x = duration.min, y = fct_rev(Assigned.Block), fill = fct_rev(Assigned.Block))) +
# stat_slab(aes(thickness = after_stat(pdf*n), alpha=0.5), scale = 0.7) +
# stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA) +
# scale_fill_discrete(direction=-1)+
# xlim(0,225) +
# labs( x = "Survey Response Time (mins)", y="",
# title = "TOTAL Response Time by Sample",
# subtitle = "(expect similiar across samples)") +
# theme_minimal() +
# easy_remove_legend()
#
#
# ##RIDGEPLOT
# ggplot(df, aes(x = duration.min, y = fct_rev(Assigned.Block), fill = fct_rev(Assigned.Block))) +
# geom_density_ridges(scale=0.9) +
# # geom_boxplot()+
# stat_pointinterval()+
# theme_ridges() +
# scale_fill_discrete(direction=-1)+
# theme(legend.position = "none") +
# labs( x = "Survey Response Time (mins)", y="",
# title = "TOTAL Response Time by Sample",
# subtitle = "(expect similiar across samples)")
# s = "B0-0" #stimulus code
# t = "STIMULUS B0 — MILLENIAL PINK PLANTS" #STIMULUS TITLE
#
# #FILTER DATAFRAME
# df <- df_graphs %>% filter(STIMULUS == s)
#
#
# #BUILD SEMANTIC DIFFERENTIALS
# m_design <- single_sd(df, "professional", "layperson", x = MAKER_DESIGN) +
# labs(title = "MAKER-DESIGN") +
# easy_remove_legend()
#
# m_data <- single_sd(df, "professional", "layperson", x = MAKER_DATA) +
# labs(title = "MAKER-DATA") +
# easy_remove_legend()
#
# m_politics <- single_sd(df, "left-leaning", "right-leaning", x = MAKER_POLITIC) +
# labs(title = "MAKER-POLITICS") +
# easy_remove_legend()
#
# m_argue <- single_sd(df, "confrontational", "diplomatic", x = MAKER_ARGUE) +
# labs(title = "MAKER-ARGUE") +
# easy_remove_legend()
#
# m_selfish <- single_sd(df, "altruistic", "selfish", x = MAKER_SELF) +
# labs(title = "MAKER-SELFISH") +
# easy_remove_legend()
#
# m_align <- single_sd(df, "does NOT share", "does share", x = MAKER_ALIGN) +
# labs(title = "MAKER-ALIGNMENT") +
# easy_remove_legend()
#
# m_trust <- single_sd(df, "untrustworthy", "trustworthy", x = MAKER_TRUST) +
# labs(title = "MAKER-TRUST") +
# easy_remove_legend()
#
#
# #BUILD MASTER PLOT
# PLOT_maker <- (m_design / m_data / m_politics / m_argue / m_selfish / m_align / m_trust) +
# plot_annotation(
# title = t,
# subtitle =""
# )
#
# PLOT_maker
# s = "B0-0" #stimulus code
# t = "STIMULUS B0 — MILLENIAL PINK PLANTS" #STIMULUS TITLE
# g = "Assigned.Block"
# ##can also use y and color to split by additional variable
#
# #FILTER DATAFRAME
# df <- df_graphs %>% filter(STIMULUS == s)
#
# #BUILD SEMANTIC DIFFERENTIALS
# m_design <- multi_sd(df, "professional", "layperson", x = "MAKER_DESIGN", y = g, color = g) +
# labs(title = "MAKER-DESIGN") +
# easy_remove_legend()
#
# m_data <- multi_sd(df, "professional", "layperson", x = "MAKER_DATA", y = g, color = g) +
# labs(title = "MAKER-DATA") +
# easy_remove_legend()
#
# m_politics <- multi_sd(df, "left-leaning", "right-leaning", x = "MAKER_POLITIC", y = g, color = g) +
# labs(title = "MAKER-POLITICS") +
# easy_remove_legend()
#
# m_argue <- multi_sd(df, "confrontational", "diplomatic", x = "MAKER_ARGUE", y = g, color = g) +
# labs(title = "MAKER-ARGUE") +
# easy_remove_legend()
#
# m_selfish <- multi_sd(df, "altruistic", "selfish", x = "MAKER_SELF", y = g, color = g) +
# labs(title = "MAKER-SELFISH") +
# easy_remove_legend()
#
# m_align <- multi_sd(df, "does NOT share", "does share", x = "MAKER_ALIGN", y = g, color = g) +
# labs(title = "MAKER-ALIGNMENT") +
# easy_remove_legend()
#
# m_trust <- multi_sd(df, "untrustworthy", "trustworthy", x = "MAKER_TRUST", y = g, color = g) +
# labs(title = "MAKER-TRUST")
#
#
# #BUILD MASTER PLOT
# PLOT_maker <- (m_design / m_data / m_politics / m_argue / m_selfish / m_align / m_trust) +
# plot_annotation(
# title = t,
# subtitle =""
# )
#
# PLOT_maker
######### MAKER ATTRIBUTES ########################
# vals = c("datacollar", "bluecollar")
# leftside <- rep ("PROFESSIONAL",n_blocks )
# rightside <-rep ("LAYPERSON",n_blocks )
# b00_m_design <- ggplot(df, aes(y = MAKER_DESIGN, x = ID.Study, color = ID.Study))+
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-DESIGN")
# b00_m_data <- ggplot(df, aes(y = MAKER_DATA, x = ID.Study, color = ID.Study))+
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-DATA")
# leftside <- rep ("LEFT-WING",n_blocks )
# rightside <-rep ("RIGHT-WING",n_blocks )
# b00_m_politics <- ggplot(df, aes(y = MAKER_POLITIC, x = ID.Study, color = ID.Study))+
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-POLITICS")
# leftside <- rep ("CONFRONTATIONAL",n_blocks )
# rightside <-rep ("DIPLOMATIC",n_blocks )
# b00_m_argue <- ggplot(df, aes(y = MAKER_ARGUE, x = ID.Study, color = ID.Study))+
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-ARGUE")
# leftside <- rep ("ALTRUSITC",n_blocks )
# rightside <-rep ("SELFISH",n_blocks )
# b00_m_selfish <- ggplot(df, aes(y = MAKER_SELF, x = ID.Study, color = ID.Study))+
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-SELFISH")
#
# leftside <- rep ("Does NOT",n_blocks )
# rightside <-rep ("DOES",n_blocks )
# b00_m_align <- ggplot(df, aes(y = MAKER_ALIGN, x = ID.Study, color = ID.Study))+
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-SHARES-MY-VALUES")
# leftside <- rep ("UNTRUSTWORTHY",n_blocks )
# rightside <-rep ("TRUSTWORTHY",n_blocks )
# b00_m_trust <- ggplot(df, aes(y = MAKER_TRUST, x = ID.Study, color = ID.Study))+
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# # easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-TRUST")
#
# PLOT_b0_maker <- (b00_m_design / b00_m_data / b00_m_politics / b00_m_argue / b00_m_selfish / b00_m_align / b00_m_trust) +
# plot_annotation(
# title = "STIMULUS B0 — MILLENIAL PINK PLANTS",
# subtitle =""
# )
#
# PLOT_b0_maker
###################################################
# #FILTER DATAFRAME
# df <- df_graphs %>% filter(STIMULUS == "B0-0")
#
# ######### MAKER ATTRIBUTES ########################
# vals = c("datacollar", "bluecollar")
# leftside <- rep ("PROFESSIONAL",1 )
# rightside <-rep ("LAYPERSON",1 )
# b_m_design <- ggplot(df, aes(y = MAKER_DESIGN, x = ""))+
# geom_boxplot(width = 0.5) +
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-DESIGN")
#
# b_m_data <- ggplot(df, aes(y = MAKER_DATA, x = ""))+
# geom_boxplot(width = 0.5) +
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-DATA")
#
#
# leftside <- rep ("LEFT-WING",1 )
# rightside <-rep ("RIGHT-WING",1 )
# b_m_politics <- ggplot(df, aes(y = MAKER_POLITIC, x = ""))+
# geom_boxplot(width = 0.5) +
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-POLITICS")
#
#
# leftside <- rep ("CONFRONTATIONAL",1 )
# rightside <-rep ("DIPLOMATIC",1 )
# b_m_argue <- ggplot(df, aes(y = MAKER_ARGUE, x = ""))+
# geom_boxplot(width = 0.5) +
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-ARGUE")
#
# leftside <- rep ("ALTRUSITC",1 )
# rightside <-rep ("SELFISH",1 )
# b_m_selfish <- ggplot(df, aes(y = MAKER_SELF, x = ""))+
# geom_boxplot(width = 0.5) +
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-SELFISH")
#
#
# leftside <- rep ("Does NOT",1 )
# rightside <-rep ("DOES",1 )
# b_m_align <- ggplot(df, aes(y = MAKER_ALIGN, x = ""))+
# geom_boxplot(width = 0.5) +
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-SHARES-MY-VALUES")
#
# leftside <- rep ("UNTRUSTWORTHY",1 )
# rightside <-rep ("TRUSTWORTHY",1 )
# b_m_trust <- ggplot(df, aes(y = MAKER_TRUST, x = ""))+
# geom_boxplot(width = 0.5) +
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-TRUST")
#
#
# PLOT_b_maker <- (b_m_design / b_m_data / b_m_politics / b_m_argue / b_m_selfish / b_m_align / b_m_trust) +
# plot_annotation(
# title = paste(unique(df$STIMULUS), "MAKER")
# )
#
# PLOT_b_maker
###################################################
#
# #FILTER DATAFRAME
# df <- df_graphs %>% filter(STIMULUS == "B0-0")
#
# ######### MAKER ATTRIBUTES ########################
# vals = c("datacollar", "bluecollar")
# leftside <- rep ("PROFESSIONAL",1 )
# rightside <-rep ("LAYPERSON",1 )
# b_m_design <- ggplot(df, aes(y = MAKER_DESIGN, x = ID.Study, color = ID.Study))+
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-DESIGN")
#
# b_m_data <- ggplot(df, aes(y = MAKER_DATA, x = ID.Study, color = ID.Study))+
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-DATA")
#
#
# leftside <- rep ("LEFT-WING",1 )
# rightside <-rep ("RIGHT-WING",1 )
# b_m_politics <- ggplot(df, aes(y = MAKER_POLITIC, x = ID.Study, color = ID.Study))+
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-POLITICS")
#
#
# leftside <- rep ("CONFRONTATIONAL",1 )
# rightside <-rep ("DIPLOMATIC",1 )
# b_m_argue <- ggplot(df, aes(y = MAKER_ARGUE, x = ID.Study, color = ID.Study))+
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-ARGUE")
#
# leftside <- rep ("ALTRUSITC",1 )
# rightside <-rep ("SELFISH",1 )
# b_m_selfish <- ggplot(df, aes(y = MAKER_SELF, x = ID.Study, color = ID.Study))+
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-SELFISH")
#
#
# leftside <- rep ("Does NOT",1 )
# rightside <-rep ("DOES",1 )
# b_m_align <- ggplot(df, aes(y = MAKER_ALIGN, x = ID.Study, color = ID.Study))+
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-SHARES-MY-VALUES")
#
# leftside <- rep ("UNTRUSTWORTHY",1 )
# rightside <-rep ("TRUSTWORTHY",1 )
# b_m_trust <- ggplot(df, aes(y = MAKER_TRUST, x = ID.Study, color = ID.Study))+
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# theme_minimal() +
# easy_remove_legend() +
# guides(
# y = guide_axis_manual(
# #breaks = vals,
# labels = leftside
# ),
# y.sec = guide_axis_manual(
# #breaks = vals,
# labels = rightside
# )) +
# labs(title = "MAKER-TRUST")
#
#
# PLOT_b_maker <- (b_m_design / b_m_data / b_m_politics / b_m_argue / b_m_selfish / b_m_align / b_m_trust) +
# plot_annotation(
# title = paste(unique(df$STIMULUS), "MAKER")
# )
#
# PLOT_b_maker
# ###################################################
# ####
# q = "MAKER_DESIGN"
# df <- df_questions %>% filter(QUESTION == q) %>%
# mutate(
# value = as.numeric(value),
# STIMULUS_CATEGORY = str_remove(STIMULUS,"B.-"),
# STIMULUS_CATEGORY = factor(STIMULUS_CATEGORY,
# levels=c("0","1","2","3","4"),
# labels= c("F","A","B","C","D"))
# )
#
#
# g <- ggplot(df, aes(y = value, x="", color = STIMULUS_CATEGORY)) +
# geom_boxplot(width = 0.5)+
# geom_jitter(width = 0.1, alpha=0.3) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# facet_grid(df$STIMULUS_CATEGORY ~ .) +
# labs(title=df$STIMULUS_CATEGORY) +
# guides(
# y = guide_axis_manual(labels = ref_labels[q,"left"]),
# y.sec = guide_axis_manual(labels = ref_labels[q,"right"])
# ) +
# theme_minimal() +
# labs (title = q)
#
# g
# left <- rep(ref_labels[q,"left"], length(unique(df$STIMULUS_CATEGORY)))
# right <- rep(ref_labels[q,"left"], length(unique(df$STIMULUS_CATEGORY)))
# a <- multi_sd(df, x = value, y = QUESTION, color = STIMULUS_CATEGORY, left=, right=right)
#
#
# multi_sd <- function (df, x, y, color, left,right) {
#
# g <- ggplot(df, aes(y = {{x}}, x = {{y}}, color = {{color}}))+
# geom_boxplot(width = 0.5) +
# geom_jitter(width = 0.1, alpha=0.5) +
# scale_y_continuous(limits=c(-1,101)) +
# facet_grid(rows=vars({{color}})) +
# labs(x="", y="") +
# coord_flip() +
# guides(
# y = guide_axis_manual(labels = left),
# y.sec = guide_axis_manual(labels = right)
# ) + theme_minimal()
#
# return(g)
# }
# ## GROUPED PLOTSD FUNCTION
# grouped_plotsd = function (data, x, type, q, boxplot) {
# ggplot(df, aes(y = .data[[x]], x="")) +
# {if(boxplot) geom_boxplot(width = 0.5) } +
# geom_jitter(width = 0.1, alpha=0.3) +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# {if(type == "Q")
# guides(
# y = guide_axis_manual(labels = ref_labels[q,"left"]),
# y.sec = guide_axis_manual(labels = ref_labels[q,"right"])
# )} +
# theme_minimal() +
# labs (
# caption = column
# )
# }
#####################################################################
# # stim <- "B1-1"
#
# # setup dataframe
# title <- df_stimuli %>% filter(ID ==stim) %>% select(NAME)
# title <- paste(stim,"|",title)
# df <- df_graphs %>% filter(STIMULUS==stim)
#
# #subset data cols
# d <- df %>% select( all_of(questions))
# plots <- as.list(lapply(colnames(d), plotsd, data = df, boxplot=TRUE))
#
# ## PICK UP HERE, FIGURE OUT HOW TO LOOP OVER STIM
#
# x <- plots[[1]] / plots[[2]] / plots[[3]] / plots[[4]] / plots[[5]] / plots[[6]] / plots[[7]] /
# plots[[8]] /plots[[9]] /plots[[10]] /plots[[11]] +
# plot_annotation(
# title = title,
# subtitle =""
# )
# x
# ggsave(plot = x, filename =paste(stim,".png"), units = c("in"), width = 10, height = 14 )
#CREATE MATRIX
# m <- matrix(nrow = length(stim), ncol = length(questions))
# rownames(m) <- stim
# colnames(m) <- questions